# Read parquet files for Ligue 1

####This file will be in SQL

In [0]:
DROP TABLE IF EXISTS football.europeanfootball.ligue1;

CREATE TABLE IF NOT EXISTS football.europeanfootball.ligue1
using DELTA
as select * from PARQUET.`abfss://silver@efsa.dfs.core.windows.net/League=Ligue 1/`;


#Drop Null Values

In [0]:
delete from football.europeanfootball.ligue1
where date is null;

#Drop Duplicate Records

In [0]:
CREATE OR REPLACE TEMPORARY VIEW duplicates_to_remove AS
SELECT Date, HomeTeam, AwayTeam, HomeGoals, AwayGoals, Result
FROM (
  SELECT *, 
         row_number() OVER (
           PARTITION BY Date, HomeTeam, AwayTeam, HomeGoals, AwayGoals, Result 
           ORDER BY Date
         ) as row_num
  FROM football.europeanfootball.ligue1
) 
WHERE row_num > 1

In [0]:
CREATE OR REPLACE TABLE football.europeanfootball.football_matches_deduplicated
USING DELTA
AS SELECT DISTINCT Date, HomeTeam, AwayTeam, HomeGoals, AwayGoals, Result
FROM football.europeanfootball.ligue1;

-- Step 2: Count records to verify
SELECT COUNT(*) AS original_count 
FROM football.europeanfootball.ligue1;

SELECT COUNT(*) AS deduplicated_count 
FROM football.europeanfootball.football_matches_deduplicated;

-- Step 3: If the counts are what you expect, replace the original table
DROP TABLE football.europeanfootball.ligue1;
ALTER TABLE football.europeanfootball.football_matches_deduplicated
RENAME TO football.europeanfootball.ligue1;

#Update season data

In [0]:
ALTER TABLE football.europeanfootball.ligue1
ADD COLUMNS (season STRING);

In [0]:
UPDATE football.europeanfootball.ligue1
SET season = 
    (CASE WHEN month(date) >= 8 THEN year(date) || '/' || (year(date) + 1)
          ELSE (year(date) - 1) || '/' || year(date) END);

#Get every goal scored by club in each season
1. First Get Home Goals
2. Get Away Goals
3. Get Total Goals per season

In [0]:
create temporary view totalhomegoals as (
  with homegoals as (
select season,hometeam as team,sum(homegoals) as goals from football.europeanfootball.ligue1
group by season,team),
awaygoals as (
select season,awayteam as team,sum(awaygoals) as goals from football.europeanfootball.ligue1
group by season,team)
select h.season,h.team,sum(h.goals)+sum(a.goals) as TotalGoalsScored from homegoals h inner join awaygoals a on h.team = a.team and h.season = a.season
group by h.season,h.team
)

#Get Goals Against

In [0]:
create temporary view totalawaygoals as (
  with homegoals as (
select season,hometeam as team,sum(awaygoals) as goals from football.europeanfootball.ligue1
group by season,team),
awaygoals as (
select season,awayteam as team,sum(homegoals) as goals from football.europeanfootball.ligue1
group by season,team)
select h.season,h.team,sum(h.goals)+sum(a.goals) as TotalGoalsAgainst from homegoals h inner join awaygoals a on h.team = a.team and h.season = a.season
group by h.season,h.team
)

#Simple Task

- Get the goal difference in a new column

In [0]:
create temporary view combinedgoals as
(select totalhomegoals.season,totalhomegoals.team, TotalGoalsScored, TotalGoalsAgainst from totalhomegoals inner join totalawaygoals on totalhomegoals.team = totalawaygoals.team and totalhomegoals.season = totalawaygoals.season)

In [0]:
create temporary view goaldifference as (
  select *, TotalGoalsScored - TotalGoalsAgainst as GoalDifference from combinedgoals
)

#Next Task is to get total number of points as well as wins

In [0]:
create temporary view homewins as (
  select season, hometeam as team, 
  SUM(CASE WHEN result = 'H' THEN 1 ELSE 0 END) AS wins,
  SUM(CASE WHEN result = 'D' THEN 1 ELSE 0 END) AS draw,
  SUM(CASE WHEN result = 'A' THEN 1 ELSE 0 END) AS loses
    from football.europeanfootball.ligue1
    group by season,hometeam)

In [0]:
create temporary view awaywins as (
  select season, awayteam as team,
  SUM(CASE WHEN result = 'A' THEN 1 ELSE 0 END) AS wins,
  SUM(CASE WHEN result = 'D' THEN 1 ELSE 0 END) AS draw,
  SUM(CASE WHEN result = 'H' THEN 1 ELSE 0 END) AS loses
  from football.europeanfootball.ligue1
  group by season,awayteam)

In [0]:
create temporary view points as (
  with combinedviews as (
      select * from homewins
      union all
      select * from awaywins
  )
  select season,team,sum(wins) as wins, sum(draw) as draw, sum(loses) as loses from combinedviews
  group by season,team
)

#Join goal difference table and total wins table

In [0]:
create temporary view combinedview as (
  select g.*, p.wins, p.draw, p.loses from goaldifference g inner join points p on g.team = p.team and g.season = p.season)

In [0]:
select * from combinedview

# To Get Total Points in the season

In [0]:
create temporary view total_points as (
  select *, (wins*3) + draw as Points from combinedview
)

#Create Final Ligue 1 Table

In [0]:
create table if not exists football.europeanfootball.finalligue1table as (
  select *,dense_rank() over (partition by season order by Points desc, GoalDifference desc) as Position from total_points)

In [0]:
DROP TABLE football.europeanfootball.ligue1;
ALTER TABLE football.europeanfootball.finalligue1table
RENAME TO football.europeanfootball.ligue1;

In [0]:
select * from football.europeanfootball.ligue1