-
Notifications
You must be signed in to change notification settings - Fork 3
/
laliga.py
99 lines (67 loc) · 2.54 KB
/
laliga.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import pandas as pd
import datetime
import logging
from ast import literal_eval
#yesterday gets yesterday's date in MMDDYYYY format and compare if that is a laliga matchday max date and proceed to get the prediction from 538 website
logging.basicConfig(filename="laliga.log",format='%(asctime)s %(message)s',filemode='a')
yesterday = (datetime.date.today() - datetime.timedelta(days=1)).strftime("%m/%d/%Y")
df_match_day = pd.read_csv('LaLigaMatchday.csv')
logger=logging.getLogger()
logger.setLevel(logging.DEBUG)
if yesterday not in df_match_day.Date.values:
logger.info("not an end of matchday")
exit()
# In[81]:
#else part
matchday_no = df_match_day.index[df_match_day['Date'] == yesterday].to_list()[0]
logger.info("it is end of matchday number: %d",matchday_no)
df = pd.read_html('https://projects.fivethirtyeight.com/soccer-predictions/la-liga/',attrs={'class': 'forecast-table'},header=2)
df = pd.DataFrame(df[0])
# In[83]:
df = df[['team','win La Ligawin league']]
#df.head()
# In[79]:
import re
def clean(team_name):
#print(team_name)
#print(re.sub("pts","",team_name))
team_name = re.sub("\ pts","",team_name)
return re.sub("[0-9]+","",team_name)
# In[91]:
def merge_col(row):
if 'list' in str(type(row['Win League'])):
lst = list(row['Win League'])
else:
lst = []
lst.append(int(row['Win League']))
lst.append(int(row['new column']))
return lst
import re
df['win La Ligawin league'] = df['win La Ligawin league'].str.replace(r'\%','')
df['win La Ligawin league'] = df['win La Ligawin league'].str.replace(r'\<','')
df = df.rename(columns={'2':'rows','win La Ligawin league':'Win League'})
df['team'] = df['team'].apply(clean)
df = df.sort_values(by=['team'])
#df.head()
import glob
filename = 'la liga.csv'
ispresent = glob.glob(filename)
if not ispresent:
df.to_csv(filename,index=False,encoding='utf-8-sig')
# exit here
exit()
else:
main_df = pd.read_csv(filename)
print(main_df.head())
# In[88]:
main_df = pd.merge(main_df,df,on='team')
main_df = main_df.rename(columns = {'Win League_x':'Win League','Win League_y':'new column'})
main_df['Win League'] = main_df['Win League'].apply(literal_eval)
main_df['Win League'] = main_df.apply(merge_col,axis = 1)
main_df = main_df.drop(['new column'],axis = 1)
main_df.to_csv(filename,index=False,encoding='utf-8-sig')
#main_df.head()
logger.info("Done writing to laliga.csv")