In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import seaborn as sns
from termcolor import colored
import plotly.express as px
import plotly.graph_objects as go

color = ["#E9F4F3", '#3CD7CB', '#165D71', '#EF6D2C', '#F08E3B', '#5E2533' ]

 <p style="background-color:#F08E3B;font-family:newtimeroman;color:#5E2533;font-size:200%;text-align:center;border-radius:10px 10px;">
    Game Cities</p>

In [None]:

cities = pd.read_csv("../input/womens-march-mania-2022/WDataFiles_Stage1/Cities.csv", encoding = "utf-8")
game_city = pd.read_csv("../input/womens-march-mania-2022/WDataFiles_Stage1/WGameCities.csv", encoding = "utf-8")
hex_plot = pd.read_csv('../input/newhelpingfile/data2.csv')

df = game_city.merge(cities)

df = df.groupby(['State']).agg({'City': 'nunique', 'Season':'count'}).reset_index()
df = df.merge(hex_plot, right_on='Abbreviation', left_on='State', how='left')
# df.dropna(inplace=True)
df.loc[df.Row.isna(), 'Column'] = [17,17,17,18,18,18]
df.loc[df.Row.isna(), 'Row'] = [1.0, 2.5, 4.0, 1.0, 2.5, 4.0]
req = df[df.State_y.isna()].loc[:, 'State_x'].tolist()
df.loc[df.State_y.isna(), 'State_y'] = req
# print('sample of data')
# display(df.sample(3))
fig = go.Figure()

fig.add_trace(go.Scatter(
        x=df.Column, y=-1*df.Row,
        text=["<br>  ".join(['State - '+ i, 'No. of Cities(in which game played) - '+str(k),'Total Games(from 2010 to 2022) - ' "{:,}".format(int(j))]) for i, k, j in zip(df.State_y, df['City'], df['Season'])],
        marker_symbol=14,
        marker_line_color="white",
        marker_color=df.Season,
        hovertemplate = '&nbsp; %{text}</b>&nbsp;<extra></extra>',
        marker_colorscale=[[0, color[1]], [1, color[2]]],
        marker_line_width=0.5, marker_size=43, mode='markers'))

fig.update_layout(clickmode='event+select',showlegend = False,
                  xaxis = dict(fixedrange = True,showgrid = False,showticklabels=False, showline=False,zeroline = False),
                  yaxis = dict(fixedrange = True,showgrid = False,showticklabels=False, showline=False,zeroline = False),
                  hoverlabel=dict(bgcolor="white",font_size=15,  font_family="Rockwell"),
                  margin=dict(t=50, l=30, r=30, b=30, pad = 0),
                  height=450, width=890,
                  plot_bgcolor=color[0],paper_bgcolor=color[0],
                  title=dict(text='Cities & Total Game Played in a State', font_size=30, font_color=color[4], y=0.95, x=0.5)
                 )
fig.add_annotation(
        x=1.02, y=0.89, xref="paper", yref="paper",
        text=f"""
            These State data I not found in USA.
        """,
        showarrow=False,font=dict(family="serif", size=10, color=color[5]),align="left")

fig.show()

 <p style="background-color:#F08E3B;font-family:newtimeroman;color:#5E2533;font-size:200%;text-align:center;border-radius:10px 10px;">
    Teams</p>

In [None]:
teams = pd.read_csv("../input/womens-march-mania-2022/WDataFiles_Stage1/WTeams.csv", encoding = "utf-8")
words = pd.Series(" ".join(teams.TeamName.tolist()).split(" "))

fig = plt.figure(figsize=(18,5))
fig.patch.set_facecolor(color[0])

fig.text(0.1,1.15, 'Total Teams - ', fontsize=30, fontweight='bold', fontfamily='serif', color=color[3])
fig.text(0.28,1.15, '370', fontsize=30, fontweight='bold', fontfamily='serif', color=color[5])
fig.text(0.34,1.15, ', out of which ', fontsize=30, fontweight='bold', fontfamily='serif', color=color[3])
fig.text(0.525,1.15, '356', fontsize=30, fontweight='bold', fontfamily='serif', color=color[5])
fig.text(0.575,1.15, ' teams currently in Division-I', fontsize=30, fontweight='bold', fontfamily='serif', color=color[3])
fig.text(0.36,1.08, 'For participation in Regular season, team should be in Division-I', fontsize=20, fontweight='bold', fontfamily='serif', color=color[2])

ax = fig.add_subplot(111)
ax.patch.set_facecolor(color[0])
ax.patch.set_alpha(1.0)
words.value_counts()[:15].plot(kind='bar', ax=ax)
for bar in ax.patches:
    if bar.xy[0]<0:
        bar.set_color(color[1])
plt.xticks(rotation=45)
ax.text(0.3, 50, '70 teams have St in their name', fontsize=18, fontweight='bold', fontfamily='serif', color=color[1])
ax.text(7, 60, 'Which word is common in team name?', fontsize=20, fontweight='bold', fontfamily='serif', color=color[4])

plt.show()

 <p style="background-color:#F08E3B;font-family:newtimeroman;color:#5E2533;font-size:200%;text-align:center;border-radius:10px 10px;">
    Seasons</p>

In [None]:
seasons = pd.read_csv("../input/womens-march-mania-2022/WDataFiles_Stage1/WSeasons.csv", encoding = "utf-8")
display(seasons.head(2))

bg_color = color[0]
fig = plt.figure(figsize = (12,3), dpi=150)
fig.patch.set_facecolor(bg_color)
gs = fig.add_gridspec(1,4)
gs.update(wspace=0.35, hspace=0.27)
ax0 = fig.add_subplot(gs[0,0])
ax1 = fig.add_subplot(gs[0,1])
ax2 = fig.add_subplot(gs[0,2])
ax3 = fig.add_subplot(gs[0,3])

fig.text(0.13,1.15, 'DayZero ', fontsize=20, fontweight='bold', fontfamily='serif', color=color[3])
fig.text(0.13,1.,"the start of the regular season, for last 25 season's always starts ", fontsize=9, fontfamily='serif', color=color[3])
fig.text(0.13,0.95,"on monday of November or October", fontsize=9, fontfamily='serif', color=color[3])
fig.text(0.53,1.15, 'Selection Monday ', fontsize=20, fontweight='bold', fontfamily='serif', color=color[5])
fig.text(0.53,1.,"*Selection of best 64, as name suggest held on monday of March", fontsize=9, fontfamily='serif', color=color[5])
fig.text(0.53,0.95,"", fontsize=9, fontfamily='serif', color=color[5])

starting_day = pd.to_datetime(seasons.DayZero).dt.strftime('%A')
starting_month = pd.to_datetime(seasons.DayZero).dt.strftime('%b')
# 133 days after dayzero, there always selection day
selection_date = pd.to_datetime(seasons.DayZero)+pd.to_timedelta([133]*len(seasons), unit='D')
selection_day = selection_date.dt.strftime('%A')
selection_month = selection_date.dt.strftime('%b')

ax = ax0
starting_day.value_counts().plot(kind='bar', color=color[3], ax=ax)
ax = ax1
starting_month.value_counts().plot(kind='bar', color=color[3], ax=ax)

ax=ax2
selection_day.value_counts().plot(kind='bar', color=color[5], ax=ax)
ax=ax3
selection_month.value_counts().plot(kind='bar', color=color[5], ax=ax)
fig.show()

In [None]:
regions = list()
for i in range(len(seasons)):
    regions.append(seasons.loc[i, 'RegionW'])
    regions.append(seasons.loc[i, 'RegionX'])
    regions.append(seasons.loc[i, 'RegionY'])    
    regions.append(seasons.loc[i, 'RegionZ'])    

data = pd.Series(regions)

fig = plt.figure(figsize=(18,5))
fig.text(0.13,1.15, 'RegionW, RegionX, Region Y, Region Z - ', fontsize=20, fontweight='bold', fontfamily='serif', color=color[3])
fig.text(0.2,1.07, "by convention, the four regions in the final tournament are always named W, X, Y, and Z.", fontsize=15, fontfamily='serif', color=color[5])


fig.patch.set_facecolor(color[0])
fig.patch.set_alpha(0.6)
ax = fig.add_subplot(111)
ax.patch.set_facecolor(color[0])
ax.patch.set_alpha(1.0)
data.value_counts()[:5].plot(kind='bar', ax=ax)
plt.xticks(rotation=45)
ax.text(-0.4,8, '"East" Region came 7 times in final tournament in last 25 seasons', fontsize=12, fontfamily='serif', color=color[2])
plt.show()



 <p style="background-color:#F08E3B;font-family:newtimeroman;color:#5E2533;font-size:200%;text-align:center;border-radius:10px 10px;">
    CAATourneySeeds</p>

In [None]:
tseed = pd.read_csv("../input/womens-march-mania-2022/WDataFiles_Stage1/WNCAATourneySeeds.csv", encoding = "utf-8")
display(tseed.head(2))
print('Data consist Team with corresponding region and seed for particular seasons.\nWe can extract Region and Seed')
tseed['Region'] = tseed.Seed.apply(lambda x: x[0])
tseed['Seed_number'] = tseed.Seed.apply(lambda x: x[1:])
display(tseed.head(2))
print('\n So for any season there will be 16 team from each region. Lets check 2021')
display(tseed.query('Season == 2021').groupby(['Region']).size())

 <p style="background-color:#F08E3B;font-family:newtimeroman;color:#5E2533;font-size:200%;text-align:center;border-radius:10px 10px;">
    RegularSeasonCompactResults</p>

In [None]:
rs_cr = pd.read_csv("../input/womens-march-mania-2022/WDataFiles_Stage1/WRegularSeasonCompactResults.csv", encoding = "utf-8")
rs_cr = rs_cr.sort_values(['Season', 'DayNum'])
display(rs_cr.sample(3))

# 98 add cus, we have data upto 98 daynum
latest_rs_date_2022 = (pd.to_datetime(seasons.DayZero)[-1:]+pd.to_timedelta([98], unit='D')).iloc[0].strftime("%Y-%m-%d")

print('Size of Table-', rs_cr.shape)

bg_color = color[0]
fig = plt.figure(figsize = (12,5), dpi=150)
fig.patch.set_facecolor(bg_color)
gs = fig.add_gridspec(2,2)
gs.update(wspace=0.55, hspace=0.57)
ax0 = fig.add_subplot(gs[0,0])
ax1 = fig.add_subplot(gs[0,1])
ax2 = fig.add_subplot(gs[1,0])
ax3 = fig.add_subplot(gs[1,1])

fig.text(0.1,1.15, 'Regular Season -', fontsize=20, fontweight='bold', fontfamily='Tahoma', color=color[5])
fig.text(0.35, 1.15, "All games which were played on DayNum=132 or earlier.", fontsize=10, fontfamily='serif', color=color[4])
fig.text(0.35, 1.05,"`All games played before Selection Monday will show up here whether it was a pre-season \ntournament, a non-conference game, a regular conference game, a conference \ntournament game, or whatever.`", fontsize=10, fontfamily='serif', color=color[4])

ax=ax0
data = rs_cr.groupby('Season').size()
data.plot(kind='line', ax=ax)
ax.fill_between(y1=[5400, 5400],x=[2020, 2024],y2=[3400, 3400], alpha=0.2, color=color[2])
ax.text(1997, 5500, 'Total Games in RS', fontsize=12, fontweight='bold', fontfamily='serif', color=color[3])
ax.text(2008, 4500, 'Covid Impact - \nGames Drastically decreases', fontsize=8, fontfamily='serif', color=color[2])
ax.text(2008,4000, f'*In 2022 Season data is upto {latest_rs_date_2022},\n that means 35 days match left', fontsize=5, fontfamily='serif')

ax.set_ylim([3400, 5400])
ax.set_xlim([1997, 2024])

ax=ax1
tab1 = rs_cr[['Season', 'WTeamID']]
tab1.columns = ['season', 'team']
tab2 = rs_cr[['Season', 'LTeamID']]
tab2.columns = ['season', 'team']
ch1 = pd.concat([tab1, tab2], axis=0)
ch1.groupby('season')['team'].nunique().plot(kind='line', ax=ax, zorder=1)
ax.scatter(x=[2022], y=[356], color=color[5], zorder=3)
ax.text(1997, 363, 'Total Teams', fontsize=12, fontweight='bold', fontfamily='serif', color=color[3])
ax.text(2014, 330, 'IN 2022, 356 Team Participated', fontsize=5, fontfamily='serif')

ax=ax2
rs_cr.groupby('Season')['DayNum'].nunique().plot(kind='line', ax=ax)
ax.text(1997, 135, 'Total Days in which all Games were played', fontsize=12, fontweight='bold', fontfamily='serif', color=color[3])
ax.text(2008,100, f'*In 2022 Season data is upto {latest_rs_date_2022},\n that means 35 days match left', fontsize=5, fontfamily='serif')

ax=ax3
(rs_cr.groupby('Season').size()/rs_cr.groupby('Season')['DayNum'].nunique()).round(0).plot(kind='line', ax=ax)
ax.text(1997, 45.5, 'Games per Day', fontsize=12, fontweight='bold', fontfamily='serif', color=color[3])

fig.text(0.15,-0.05, 'Insight -', fontsize=10, fontweight='bold', fontfamily='serif', color=color[5])
fig.text(0.15, -0.19,"""
* Total Games in Regular Seasons Inceasing year on year. That can be correlate with increase in teams.
* SO Games of 2020 was completed before march. After that COVID IMPACT can be seen in NCAA. 
* In 2022, we found heighest team participation, means total games will be heighest before selection monday.
""", fontsize=10, fontfamily='serif', color=color[4])
fig.show()

In [None]:
pd.concat([rs_cr.groupby(['Season', 'WTeamID','WLoc'])['WScore'].mean().to_frame(), rs_cr.groupby(['Season', 'LTeamID','WLoc'])['LScore'].mean().to_frame()], axis=1)

 <p style="background-color:#F08E3B;font-family:newtimeroman;color:#5E2533;font-size:200%;text-align:center;border-radius:10px 10px;">
    NCAATourneyCompactResults.csv</p>

In [None]:
t_cr = pd.read_csv("../input/womens-march-mania-2022/WDataFiles_Stage1/WNCAATourneyCompactResults.csv", encoding = "utf-8")
print(t_cr.shape)
display(t_cr.head(2))

bg_color = color[0]
fig = plt.figure(figsize = (7,3), dpi=150)
fig.patch.set_facecolor(bg_color)
gs = fig.add_gridspec(1,1)
gs.update(wspace=0.05, hspace=0.07)
ax0 = fig.add_subplot(gs[0,0])
ax0.patch.set_facecolor(color[0])

fig.text(0.1,1.15, 'Women NCAA Tournament', fontsize=25, fontweight='bold', fontfamily='Tahoma', color=color[5])
fig.text(0.1, 1.05, "64", fontsize=20, fontfamily='DejaVu Sans', fontweight='bold', color=color[4])
fig.text(0.16, 1.05, "Teams & ", fontsize=18, fontfamily='DejaVu Sans', color=color[4])
fig.text(0.32, 1.05, "10", fontsize=20, fontfamily='DejaVu Sans', fontweight='bold', color=color[4])
fig.text(0.38, 1.05, "Action pact Days", fontsize=18, fontfamily='DejaVu Sans', color=color[4])


t_cr['Day_of_tournament'] = t_cr.groupby(['Season'])['DayNum'].rank('dense')
req = t_cr.pivot_table(index='Day_of_tournament', columns=['Season'], values='WTeamID', aggfunc='count').loc[:,[1998, 2021]]
req.columns = ['upto 2002', '2003 onwards']
req.reset_index(inplace=True)
req = req.melt(id_vars=['Day_of_tournament'], value_vars=['upto 2002', '2003 onwards'])
req['Day_of_tournament'] = req['Day_of_tournament'].astype('int')
sns.barplot(data=req, x='Day_of_tournament', y='value', hue='variable', ax=ax0, palette=[color[1], color[2]])
sns.move_legend(ax0, "upper center", bbox_to_anchor=(0.3, 1.15), ncol=3, title=None, frameon=False)

In [None]:
tab1 = t_cr[['Season', 'WTeamID']]
tab1.columns = ['season', 'team']
tab2 = t_cr[['Season', 'LTeamID']]
tab2.columns = ['season', 'team']
ch1 = pd.concat([tab1, tab2], axis=0)
req = ch1.groupby('team')['season'].nunique().sort_values(ascending=False).reset_index()
req.columns= ['TeamID', 'Tour_count']
req = req.merge(teams).head(10)

bg_color = color[0]
fig = plt.figure(figsize = (7,3), dpi=150)
fig.patch.set_facecolor(bg_color)
gs = fig.add_gridspec(1,1)
gs.update(wspace=0.05, hspace=0.07)
ax0 = fig.add_subplot(gs[0,0])
ax0.patch.set_facecolor(color[0])

ax0.text(-0.1, 30, "Top 10 Team played tournaments", fontsize=20, fontfamily='DejaVu Sans', color=color[3])
ax0.text(0.1, 25, "Three team played in all 23 tournaments", fontsize=8, fontfamily='DejaVu Sans', color=color[4])
sns.barplot(data=req, x='TeamName', y='Tour_count', ax=ax0, palette=[color[1]])
for bar in ax0.patches:
    if bar.get_x() < 2:
        bar.set_color(color[4])
plt.xticks(rotation=45) 
ax0.set_ylabel("")
fig.show()

 <p style="background-color:#F08E3B;font-family:newtimeroman;color:#5E2533;font-size:200%;text-align:center;border-radius:10px 10px;">
    SampleSubmissionStage1</p>

In [None]:
ss_s1 = pd.read_csv("../input/womens-march-mania-2022/WDataFiles_Stage1/WSampleSubmissionStage1.csv", encoding = "utf-8")
print(ss_s1.shape)
display(ss_s1.head(2))

More to come!!!