In [1]:
import pandas as pd 
import numpy as np

In [4]:
df = pd.read_csv('cleardataset/premier-league-matches.csv')

conditions = [
    df['HomeGoals'] == df['AwayGoals'],                         # 무승부
    (df['FTR'] == 'H') & (df['HomeGoals'] > df['AwayGoals']),   # 홈 승
    (df['FTR'] == 'A') & (df['AwayGoals'] > df['HomeGoals'])    # 원정 승
]

# 각 조건에 대응하는 값
choices = [1, 3, 3]

# 조건을 만족하지 않는 경우(패배)는 0
df['Points'] = np.select(conditions, choices, default=0)

# --- 3) Points 기준 정렬(내림차순) ---
df = df.sort_values(by='Points', ascending=False)

print(df)

      Season_End_Year  Wk        Date           Home  HomeGoals  AwayGoals  \
0                1993   1  1992-08-15  Coventry City          2          1   
7460             2011  38  2011-05-22      Tottenham          2          1   
7493             2012   3  2011-08-28      West Brom          0          1   
7494             2012   3  2011-08-28  Newcastle Utd          2          1   
7496             2012   4  2011-09-10         Bolton          0          5   
...               ...  ..         ...            ...        ...        ...   
4245             2003  21  2002-12-28        Everton          0          0   
8282             2014   6  2013-09-28      Tottenham          1          1   
6411             2009   9  2008-10-26     Portsmouth          1          1   
1715             1996  34  1996-04-06       West Ham          1          1   
7601             2012  14  2011-12-03            QPR          1          1   

                 Away FTR  Points  
0       Middlesbrough   H  

In [5]:
output_path = 'dataset/results_with_points.csv' 
df.to_csv(output_path, index=False, encoding='utf-8-sig')

In [19]:
# 2) 홈/원정 각각 승점 계산 ──────────────────────────
def points_per_side(row):
    if row['HomeGoals'] > row['AwayGoals']:      # 홈 승
        return pd.Series({'HomePts': 3, 'AwayPts': 0})
    elif row['HomeGoals'] < row['AwayGoals']:    # 원정 승
        return pd.Series({'HomePts': 0, 'AwayPts': 3})
    else:                                        # 무승부
        return pd.Series({'HomePts': 1, 'AwayPts': 1})

df[['HomePts', 'AwayPts']] = df.apply(points_per_side, axis=1)

# 3) 홈·원정 정보를 하나로 묶어 팀‑시즌별로 집계 ────────
home = (
    df[['Season_End_Year', 'Home', 'HomePts']]
      .rename(columns={'Home': 'Team', 'HomePts': 'Points'})
)
away = (
    df[['Season_End_Year', 'Away', 'AwayPts']]
      .rename(columns={'Away': 'Team', 'AwayPts': 'Points'})
)

points_long = pd.concat([home, away], ignore_index=True)

team_season_pts = (
    points_long
      .groupby(['Team', 'Season_End_Year'], as_index=False)['Points']
      .sum()
)

# 4) 피벗: 행=Team, 열=Season_end_year ────────────────
pivot = (
    team_season_pts
      .pivot(index='Team', columns='Season_End_Year', values='Points')
)

# 5) 빠진 시즌(열) 0으로 채우고 정렬 ───────────────────
all_seasons = list(range(1992, 2026))            # 1992 ~ 2025

pivot = (
    pivot
      .reindex(columns=all_seasons)   
      .fillna(0)                      
      .astype(int)                    
      .sort_index(axis=1)             
      .sort_index()                   
)

print(pivot)
pivot.to_csv('dataset/season_points_by_team.csv')

Season_End_Year  1992  1993  1994  1995  1996  1997  1998  1999  2000  2001  \
Team                                                                          
Arsenal             0    56    71    51    63    68    78    78    73    70   
Aston Villa         0    74    57    48    63    61    57    55    58    54   
Barnsley            0     0     0     0     0     0    35     0     0     0   
Birmingham City     0     0     0     0     0     0     0     0     0     0   
Blackburn           0    71    84    89    61    42    58    35     0     0   
Blackpool           0     0     0     0     0     0     0     0     0     0   
Bolton              0     0     0     0    29     0    40     0     0     0   
Bournemouth         0     0     0     0     0     0     0     0     0     0   
Bradford City       0     0     0     0     0     0     0     0    36    26   
Brentford           0     0     0     0     0     0     0     0     0     0   
Brighton            0     0     0     0     0     0 

In [20]:
df2 = pd.read_csv('dataset/season_points_by_team.csv')

df2['sum'] = df2.loc[:, '1992':'2025'].sum(axis=1)

top10 = (
    df2.sort_values(by='sum', ascending=False)
       .reset_index()
       .loc[:, ['Team', 'sum']]
       .rename(columns={'Team': '팀 이름', 'sum': '총 승점'})
)

top10.index += 1  # 순위 1부터 시작하도록
print(top10)

               팀 이름  총 승점
1    Manchester Utd  2441
2           Arsenal  2225
3           Chelsea  2182
4         Liverpool  2176
5         Tottenham  1847
6   Manchester City  1718
7           Everton  1610
8     Newcastle Utd  1481
9       Aston Villa  1419
10         West Ham  1298
11      Southampton  1088
12        Blackburn   970
13   Leicester City   821
14     Leeds United   820
15           Fulham   692
16    Middlesbrough   664
17       Sunderland   618
18   Crystal Palace   609
19           Bolton   575
20        West Brom   490
21       Stoke City   457
22    Coventry City   409
23     Norwich City   402
24   Sheffield Weds   392
25        Wimbledon   391
26           Wolves   389
27     Charlton Ath   361
28   Wigan Athletic   331
29          Burnley   325
30     Swansea City   312
31              QPR   308
32       Portsmouth   302
33  Birmingham City   301
34          Watford   285
35  Nott'ham Forest   277
36     Derby County   274
37         Brighton   271
38      Bour