In [None]:
'''Import basic modules.'''
import pandas as pd
import numpy as np

'''visualization Tools'''
import altair as alt
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("whitegrid")
%matplotlib inline

'''Bokeh (interactive visualization)'''
from bokeh.plotting import figure, show,output_notebook, ColumnDataSource
from bokeh.layouts import row
output_notebook() 

'''Plotly visualization .'''
import plotly.offline as py
from plotly.offline import iplot, init_notebook_mode
import plotly.graph_objs as go
py.init_notebook_mode(connected = True)

# NCAA® March Madness: Exploratory Analysis
![](https://adeshpande3.github.io/assets/Cover8th.png)

# About March Madness

The NCAA college basketball tournament is a single-elimination tournament that features 68 teams vying to survive three weekends of games to be crowned the national champions. The field used to be 64 teams, but the NCAA has recently added four more teams that play "play in" games to earn a spot in the final field of 64.

Only 16 teams (the "Sweet Sixteen") make it past the first weekend. The second weekend narrows the field first to the "Elite Eight" and then the "Final Four. The final weekend focuses on the four semifinalists. The two semifinal victors move on to play in the national championship game.

Ohio State University coach Harold Olsen is usually credited with developing the idea for the tournament in 1939 with the help of the National Association of Basketball Coaches.

The 68 teams in the tournament include 32 teams that receive automatic bids for winning their respective conferences. The remaining 36 teams are given at-large bids by the NCAA selection committee based upon their performance during the season.

Once the field is set, the teams are divided into four regions (usually spread geographically through the eastern, western, midwestern, and southern U.S.) and placed into a bracket that lays out the path a team must take to reach the finals. Each team is seeded or ranked within its region, from 1 to 16.

Higher-seeded teams generally play lower-seeded teams in the beginning. For example, in the first round, each team seeded #1 plays the team seeded #16. This trend continues until upsets begin to occur, at which time brackets can become hard to predict as unexpectedly-good teams (often called "Cinderella" teams) make a run in the tournament.
[**Source**](https://www.wonderopolis.org/wonder/what-is-march-madness).
**For better understanding about March Madness visit @parulpandey** [Kernel](https://www.kaggle.com/parulpandey/decoding-march-madness/#data)
![](https://media0.giphy.com/media/Jve9N0AwwXiiQ/source.gif)

# Introduction
Although I am not a huge college basketball fan, I find this particular data set fascinating in its richness. Let’s dig in and see what we can learn about the history of March Madness.The below analysis will be conducted on both the regular season and tournament statistics. It will begin by taking a high level view at the NCAA championships - who has won them and how. Hopefully along the way, I’ll discover something useful to use in your models.

# Data Section 1 - The Basics
This section provides exploratory data analysis (EDA) of: 
 * Team ID's and Team Names
 * Historical tournament seeds
 * Final scores of historical regular season, conference tournament, and NCAA® tournament games
 * Season-level details including dates and region names
 
Data Section 1 - The Basics ==> [File descriptions](https://www.kaggle.com/c/march-madness-analytics-2020/data)

In [None]:
mteams = pd.read_csv('../input/march-madness-analytics-2020/2020DataFiles/2020-Mens-Data/MDataFiles_Stage1/MTeams.csv')
mseasons = pd.read_csv('../input/march-madness-analytics-2020/2020DataFiles/2020-Mens-Data/MDataFiles_Stage1/MSeasons.csv')
mtourney_seed = pd.read_csv('../input/march-madness-analytics-2020/2020DataFiles/2020-Mens-Data/MDataFiles_Stage1/MNCAATourneySeeds.csv')
mseason_results = pd.read_csv('../input/march-madness-analytics-2020/2020DataFiles/2020-Mens-Data/MDataFiles_Stage1/MRegularSeasonCompactResults.csv')
mtourney_results = pd.read_csv('../input/march-madness-analytics-2020/2020DataFiles/2020-Mens-Data/MDataFiles_Stage1/MNCAATourneyCompactResults.csv')
conference = pd.read_csv('../input/march-madness-analytics-2020/2020DataFiles/2020-Mens-Data/MDataFiles_Stage1/Conferences.csv')
team_conference = pd.read_csv('../input/march-madness-analytics-2020/2020DataFiles/2020-Mens-Data/MDataFiles_Stage1/MTeamConferences.csv')

## Which Team Wins And Lost The Most Tournaments? 

In [None]:
# data preparation
wteam = mtourney_results.rename(columns={'WTeamID':'TeamID'}) # rename the WteamID as TeamID for merge with mteams dataframe
win_team =  wteam.merge(mteams, on='TeamID')                  # merge with mteams dataframe
#win_team = win_team.rename(columns={'TeamID':'WTeamID_X'})
temp = win_team['TeamName'].value_counts().head(15).reset_index()

# Create ColumnDataSource from data frame
source = ColumnDataSource(temp)
win_team_list = source.data['index'].tolist()

# Add Plot
p = figure(
    y_range = win_team_list,
    plot_width=600,
    plot_height=320,
    title="Most Tournament Wins since 1985",
    y_axis_label='Winners',
    tools=""
    
)
p.title.text_font = 'helvetica'
p.title.text_font_size = '12pt'
p.title.text_font_style = "bold"

p.hbar(
        y='index',
        right='TeamName',
        height=0.8,
        color = 'blue',
        line_color="black", 
        line_width=1,
        fill_alpha=0.7,
        source = source
)
show(p)

# data preparation
lteam = mtourney_results.rename(columns={'LTeamID':'TeamID'})
lost_team =  lteam.merge(mteams, on='TeamID')
lost_team = lost_team.rename(columns={'TeamID':'LTeamID_X'})
temp = lost_team['TeamName'].value_counts().head(15).reset_index()

# Create ColumnDataSource from data frame
source = ColumnDataSource(temp)
lost_team_list = source.data['index'].tolist()

# Add Plot
p = figure(
    y_range = lost_team_list,
    plot_width=600,
    plot_height=320,
    title="Most Tournament Lost since 1985",
    y_axis_label='Runner-Up',
    tools=""
    
)
p.title.text_font = 'helvetica'
p.title.text_font_size = '12pt'
p.title.text_font_style = "bold"

p.hbar(
        y='index',
        right='TeamName',
        height=0.8,
        color = 'orange',
        line_color="black", 
        line_width=1,
        fill_alpha=0.7,
        source = source
)

show(p)

## Which Team Wins And Lost The Most Championships?

* DayNum=154 (Mon) - Round 6, otherwise known as "national final" or "national championship", to bring the tournament field from 2 teams to 1 champion team

In [None]:
#data preparation
ncaa_win_camp = win_team[win_team['DayNum'] ==154]['TeamName'].value_counts().reset_index()

# Create ColumnDataSource from data frame
source = ColumnDataSource(ncaa_win_camp )
win_camp_list = source.data['index'].tolist()

# Add Plot
p = figure(
    y_range = win_camp_list,
    plot_width=600,
    plot_height=320,
    title="Tournament Championship Wins since 1985",
    y_axis_label='Winners',
    tools=""
    
)
p.title.text_font = 'helvetica'
p.title.text_font_size = '12pt'
p.title.text_font_style = "bold"

p.hbar(
        y='index',
        right='TeamName',
        height=0.8,
        color = 'blue',
        line_color="black", 
        line_width=1,
        fill_alpha=0.7,
        source = source
)

show(p)

#data preparation
ncaa_lost_camp  = lost_team[lost_team['DayNum']==154]['TeamName'].value_counts().reset_index()

# Create ColumnDataSource from data frame
source = ColumnDataSource(ncaa_lost_camp)
lost_camp_list = source.data['index'].tolist()

# Add Plot
p = figure(
    y_range = lost_camp_list,
    plot_width=600,
    plot_height=300,
    title="Tournament Championship Lost since 1985",
    y_axis_label='Runner-Up',
    tools=""
    
)
p.title.text_font = 'helvetica'
p.title.text_font_size = '12pt'
p.title.text_font_style = "bold"

p.hbar(
        y='index',
        right='TeamName',
        height=0.8,
        color = 'orange',
        line_color="black", 
        line_width=1,
        fill_alpha=0.7,
        source = source
)

show(p)

The major programs certainly fill out the top schools when it comes to championship games, and none more than Duke. Duke won 97 times and runner-up 29 times. Also **Duke** wins 5 Championship with 5 runner-up  this results are shows that who is the champion of the games so far. 

## Which Team Seed And Conference Wins The Most Championships?

In [None]:
mtourney_seed['Region'] = mtourney_seed['Seed'].apply(lambda x: x[0][:1])
mtourney_seed['Seed'] = mtourney_seed['Seed'].apply(lambda x: int(x[1:3]))

In [None]:
#data preparation
seed_win_team =  win_team.merge(mtourney_seed, on=['TeamID','Season'])
seed_win_camp = seed_win_team[seed_win_team['DayNum'] ==154]['Seed'].value_counts().reset_index()

seed = list(seed_win_camp['index'].astype(str))
count =  list(seed_win_camp['Seed'])

#plot
dot = figure(title="Seeds With The Most Titles since 1985", tools="", toolbar_location=None,
            y_range=seed, x_range=[0,25], plot_width=600, plot_height=400,)
dot.title.text_font = 'helvetica'
dot.title.text_font_size = '12pt'
dot.title.text_font_style = "bold"

dot.segment(0, seed, count, seed, line_width=3, line_color="green", )
dot.circle(count, seed, size=15, fill_color="orange", line_color="green", line_width=3, )

show(dot)
#----------------------------------------
from bokeh.models import LabelSet
#data preparation
team_conf =  team_conference.merge(conference, on='ConfAbbrev') 
conf_win_team =  win_team.merge(team_conf, on=['TeamID', 'Season'])
conf_win_camp = conf_win_team[conf_win_team['DayNum']==154]['Description'].value_counts().reset_index()

# Create ColumnDataSource from data frame
source = ColumnDataSource(conf_win_camp)
conf_team_list = source.data['index'].tolist()

# Add Plot
p = figure(
    y_range = conf_team_list,
    plot_width=800,
    plot_height=400,
    title="'NCAA Championships by Conference 1985",
    tools=""
    
)
p.title.text_font = 'helvetica'
p.title.text_font_size = '12pt'
p.title.text_font_style = "bold"

p.hbar(
        y='index',
        right='Description',
        height=0.8,
        color = 'green',
        line_color="black", 
        line_width=1,
        fill_alpha=0.7,
        source = source
)
labels = LabelSet(y='index', x='Description', text='Description', x_offset=-18, y_offset=-5.5, source=source, render_mode='canvas')
p.add_layout(labels)
show(p)

Since the tournament was expanded to 64 teams in 1985, No. 1 seeds have won the tournament 21 times. A No. 2 seed has won the tournament five times while the third seed has won four times. Interestingly, the number 5 seed has not won a tournament in the period analysed. The “seed of death” perhaps.**Atlantic Coast Conference** have produced the most champions and won 11 championships since 1985.

Let's look on the conferences and their teams who contributed in the tournament

In [None]:
conf_win_team = conf_win_team[conf_win_team['DayNum']==154]
temp_df = pd.crosstab(conf_win_team.Description, conf_win_team.TeamName)
plt.rcParams['figure.figsize'] = (8,8)
sns.set_style("white")
sns.heatmap(temp_df, cmap="YlGnBu", annot=True,fmt='g', cbar=False)
plt.xlabel('Team Name',fontsize = 20)
plt.ylabel('Conference',fontsize = 20)
plt.title('Conference Matchups With Teams NCAA Tournament', fontsize = 20)
plt.show()

Duke and North Carolina members of the Atlantic Coast Conference (ACC) are conbributed the most championships.

# Data Section 2 - Team Box Scores
This section provides game-by-game stats at a team level (free throws attempted, defensive rebounds, turnovers, etc.) for all regular season, conference tournament, and NCAA® tournament games since the 2002-03 season.

Data Section 2 - Team Box Scores ==> [File descriptions](https://www.kaggle.com/c/march-madness-analytics-2020/data)


In [None]:
tour_results = pd.read_csv('../input/march-madness-analytics-2020/2020DataFiles/2020-Mens-Data/MDataFiles_Stage1/MNCAATourneyDetailedResults.csv')
season_results = pd.read_csv('../input/march-madness-analytics-2020/2020DataFiles/2020-Mens-Data/MDataFiles_Stage1/MRegularSeasonDetailedResults.csv')
season_results.head()

## Indicators of Regular Season Success
Let’s now turn to the regular season game statistics. We are interested in knowing how certain statistics correlate with winning vs losing. We will take the regular season detail and first convert it to a more ‘long’ format with only 1 column of TeamIDs and a factor indicating whether that row corresponds to a win or a loss. Here I also add some additional game statistcs. These include field goal percentage, free throw percentage, offensive/defensive rebounding efficiency, and possessions. The feature-engineering code from Laksan Nathan’s [kernel here](https://www.kaggle.com/lnatml/feature-engineering-with-advanced-stats) .
* **For More Info visit [stats.nba.com](https://stats.nba.com/help/glossary/)**

In [None]:
#Points Winning/Losing Team
season_results['WPts'] = season_results.apply(lambda row: 2*row.WFGM + row.WFGM3 + row.WFTM, axis=1)
season_results['LPts'] = season_results.apply(lambda row: 2*row.LFGM + row.LFGM3 + row.LFTM, axis=1)

#Calculate Winning/losing Team Possesion Feature
wPos = season_results.apply(lambda row: 0.96*(row.WFGA + row.WTO + 0.44*row.WFTA - row.WOR), axis=1)
lPos = season_results.apply(lambda row: 0.96*(row.LFGA + row.LTO + 0.44*row.LFTA - row.LOR), axis=1)
#two teams use almost the same number of possessions in a game
#(plus/minus one or two - depending on how quarters end)
#so let's just take the average
season_results['Pos'] = (wPos+lPos)/2

'''Advanced Metrics'''
#Offensive efficiency (OffRtg) = 100 x (Points / Possessions)
season_results['WOffRtg'] = season_results.apply(lambda row: 100 * (row.WPts / row.Pos), axis=1)
season_results['LOffRtg'] = season_results.apply(lambda row: 100 * (row.LPts / row.Pos), axis=1)

#Defensive efficiency (DefRtg) = 100 x (Opponent points / Opponent possessions)
season_results['WDefRtg'] = season_results.LOffRtg
season_results['LDefRtg'] = season_results.WOffRtg

#Net Rating = Off.Rtg - Def.Rtg
season_results['WNetRtg'] = season_results.apply(lambda row:(row.WOffRtg - row.WDefRtg), axis=1)
season_results['LNetRtg'] = season_results.apply(lambda row:(row.LOffRtg - row.LDefRtg), axis=1)
                         
#Assist Ratio : Percentage of team possessions that end in assists
season_results['WAstR'] = season_results.apply(lambda row: 100 * row.WAst / (row.WFGA + 0.44*row.WFTA + row.WAst + row.WTO), axis=1)
season_results['LAstR'] = season_results.apply(lambda row: 100 * row.LAst / (row.LFGA + 0.44*row.LFTA + row.LAst + row.LTO), axis=1)

#Turnover Ratio: Number of turnovers of a team per 100 possessions used.
#(TO * 100) / (FGA + (FTA * 0.44) + AST + TO)
season_results['WTOR'] = season_results.apply(lambda row: 100 * row.WTO / (row.WFGA + 0.44*row.WFTA + row.WAst + row.WTO), axis=1)
season_results['LTOR'] = season_results.apply(lambda row: 100 * row.LTO / (row.LFGA + 0.44*row.LFTA + row.LAst + row.LTO), axis=1)

#The Shooting Percentage : Measure of Shooting Efficiency (FGA/FGA3, FTA)
season_results['WTSP'] = season_results.apply(lambda row: 100 * row.WPts / (2 * (row.WFGA + 0.44 * row.WFTA)), axis=1)
season_results['LTSP'] = season_results.apply(lambda row: 100 * row.LPts / (2 * (row.LFGA + 0.44 * row.LFTA)), axis=1)

#eFG% : Effective Field Goal Percentage adjusting for the fact that 3pt shots are more valuable 
season_results['WeFGP'] = season_results.apply(lambda row:(row.WFGM + 0.5 * row.WFGM3) / row.WFGA, axis=1)      
season_results['LeFGP'] = season_results.apply(lambda row:(row.LFGM + 0.5 * row.LFGM3) / row.LFGA, axis=1)   

#FTA Rate : How good a team is at drawing fouls.
season_results['WFTAR'] = season_results.apply(lambda row: row.WFTA / row.WFGA, axis=1)
season_results['LFTAR'] = season_results.apply(lambda row: row.LFTA / row.LFGA, axis=1)
                         
#OREB% : Percentage of team offensive rebounds
season_results['WORP'] = season_results.apply(lambda row: row.WOR / (row.WOR + row.LDR), axis=1)
season_results['LORP'] = season_results.apply(lambda row: row.LOR / (row.LOR + row.WDR), axis=1)

#DREB% : Percentage of team defensive rebounds
season_results['WDRP'] = season_results.apply(lambda row: row.WDR / (row.WDR + row.LOR), axis=1)
season_results['LDRP'] = season_results.apply(lambda row: row.LDR / (row.LDR + row.WOR), axis=1)                                      

#REB% : Percentage of team total rebounds
season_results['WRP'] = season_results.apply(lambda row: (row.WDR + row.WOR) / (row.WDR + row.WOR + row.LDR + row.LOR), axis=1)
season_results['LRP'] = season_results.apply(lambda row: (row.LDR + row.LOR) / (row.WDR + row.WOR + row.LDR + row.LOR), axis=1) 

### Distribution of Statistics for Winning and Losing teams.
Now let’s take a look at the distributions of these statistics for winning and losing teams.

In [None]:
from matplotlib.font_manager import FontProperties
font = FontProperties()
font.set_family('serif')
sns.set_style("whitegrid")
f, axes = plt.subplots (5,2, figsize=(10,20))  

ax1 = sns.kdeplot(season_results['WPts'], shade = True,ax=axes[0,0], label="W",color = 'k')
ax1 = sns.kdeplot(season_results['LPts'], shade = True,ax=axes[0,0], label="L", color = 'm')
ax1.set(xlabel='Points')
axes[0,0].set_title('Macth Points',loc='left',fontsize=15, FontProperties=font, fontweight='bold')
plt.setp(ax1.get_legend().get_texts(), fontsize='10')

ax2 = sns.kdeplot(wPos, shade = True,ax=axes[0,1], label="W",color = 'k')
ax2 = sns.kdeplot(lPos, shade = True,ax=axes[0,1], label="L", color = 'm')
ax2.set(xlabel='Possesion Points')
axes[0,1].set_title('Team Possesion',loc='left',fontsize=15, FontProperties=font, fontweight='bold')
plt.setp(ax2.get_legend().get_texts(), fontsize='10')

ax3 = sns.kdeplot(season_results['WOffRtg'], shade = True,ax=axes[1,0], label="W",color = 'k')
ax3 = sns.kdeplot(season_results['LOffRtg'], shade = True,ax=axes[1,0], label="L", color = 'm')
ax3.set(xlabel='Offensive efficiency')
axes[1,0].set_title('Offensive Efficiency',loc='left',fontsize=15, FontProperties=font, fontweight='bold')
plt.setp(ax3.get_legend().get_texts(), fontsize='10')

ax4 = sns.kdeplot(season_results['WDefRtg'], shade = True,ax=axes[1,1], label="W",color = 'k')
ax4 = sns.kdeplot(season_results['LDefRtg'], shade = True,ax=axes[1,1], label="L", color = 'm')
ax4.set(xlabel='Defensive efficiency')
axes[1,1].set_title('Defensive Efficiency',loc='left',fontsize=15, FontProperties=font, fontweight='bold')
plt.setp(ax4.get_legend().get_texts(), fontsize='10')

ax5 = sns.kdeplot(season_results['WAstR'], shade = True,ax=axes[2,0], label="W",color = 'k')
ax5 = sns.kdeplot(season_results['LAstR'], shade = True,ax=axes[2,0], label="L", color = 'm')
ax5.set(xlabel='Assist')
axes[2,0].set_title('Assist Ratio',loc='left',fontsize=15, FontProperties=font, fontweight='bold')
plt.setp(ax5.get_legend().get_texts(), fontsize='10')

ax6 = sns.kdeplot(season_results['WTOR'], shade = True,ax=axes[2,1], label="W",color = 'k')
ax6 = sns.kdeplot(season_results['LTOR'], shade = True,ax=axes[2,1], label="L", color = 'm')
ax6.set(xlabel='Turnover')
axes[2,1].set_title('Turnover Ratio',loc='left',fontsize=15, FontProperties=font, fontweight='bold')
plt.setp(ax6.get_legend().get_texts(), fontsize='10')

ax7 = sns.kdeplot(season_results['WTSP'], shade = True,ax=axes[3,0], label="W",color = 'k')
ax7 = sns.kdeplot(season_results['LTSP'], shade = True,ax=axes[3,0], label="L", color = 'm')
ax7.set(xlabel='Shooting')
axes[3,0].set_title('Goal Shooting Percentage',loc='left',fontsize=15, FontProperties=font, fontweight='bold')
plt.setp(ax7.get_legend().get_texts(), fontsize='10')

ax8 = sns.kdeplot(season_results['WeFGP'], shade = True,ax=axes[3,1], label="W",color = 'k')
ax8 = sns.kdeplot(season_results['LeFGP'], shade = True,ax=axes[3,1], label="L", color = 'm')
ax8.set(xlabel='3pt Goal Shots %')
axes[3,1].set_title('3pt Goal Shots',loc='left',fontsize=15, FontProperties=font, fontweight='bold')
plt.setp(ax8.get_legend().get_texts(), fontsize='10')

ax9 = sns.kdeplot(season_results['WORP'], shade = True,ax=axes[4,0], label="W",color = 'k')
ax9 = sns.kdeplot(season_results['LORP'], shade = True,ax=axes[4,0], label="L", color = 'm')
ax9.set(xlabel='Offensive %')
axes[4,0].set_title('Offensive Rebounding  Efficiency',loc='left',fontsize=15, FontProperties=font, fontweight='bold')
plt.setp(ax9.get_legend().get_texts(), fontsize='10')

ax10 = sns.kdeplot(season_results['WDRP'], shade = True,ax=axes[4,1], label="W",color = 'k')
ax10 = sns.kdeplot(season_results['LDRP'], shade = True,ax=axes[4,1], label="L", color = 'm')
ax10.set(xlabel='Defensive %')
axes[4,1].set_title('Defensive Rebounding Efficiency',loc='left',fontsize=15, FontProperties=font, fontweight='bold')
plt.setp(ax10.get_legend().get_texts(), fontsize='10')

plt.subplots_adjust(wspace=0.4)
plt.subplots_adjust(hspace=0.4)
plt.show()

Unsurprisingly, we see that winning teams tend to have a higher mean (or lower in the case of turnover) in pretty much every metric. But possession between the two teams use almost the same number of possessions points in a game.

We don’t have final game statistics until we have the game result, so we obviously can’t use these statistics in this form to predict the winners of tournament matchups. However, we can use regular season aggregate statistics to know the Correlation between Absolute Variable And Relative (statistics) Variables and to predict the winner in tournament matchups. Let’s take a look at that next. 

### Correlation between Absolute Variable And Relative Variables of Winning and Losing teams.
Now let’s take a look at the Correlation between Absolute Variable And Relative (statistics) Variables of Winning and Losing teams. 

In [None]:
sns.set_style("white")
f,axes = plt.subplots(1,4, figsize=(12,12)) 

Wabsolute_values = season_results[['WScore','WFGM', 'WFGA', 'WFGM3', 'WFGA3', 'WFTM', 
                          'WFTA', 'WOR', 'WDR','WAst', 'WTO', 'WStl', 'WBlk', 'WPF']]
Wrelative_values = season_results[['WPts','Pos', 'WOffRtg', 'WDefRtg', 'WNetRtg','WAstR', 
                          'WTOR', 'WTSP', 'WeFGP', 'WFTAR', 'WORP', 'WDRP', 'WRP']]
Labsolute_values = season_results[['LScore','LFGM', 'LFGA', 'LFGM3', 'LFGA3','LFTM', 
                                   'LFTA', 'LOR', 'LDR', 'LAst', 'LTO', 'LStl', 'LBlk', 'LPF',]]
Lrelative_values = season_results[['LPts','Pos', 'LOffRtg', 'LDefRtg', 'LNetRtg','LAstR', 
                          'LTOR', 'LTSP', 'LeFGP', 'LFTAR', 'LORP', 'LDRP', 'LRP']]

corr1 = Wabsolute_values.corr()
mask = np.zeros_like(corr1, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
ax1 = sns.heatmap(corr1, mask=mask, cmap="RdGy",linewidths=.5, 
                  vmin=0, vmax=1,ax=axes[0],cbar=False,square=True)
ax1.tick_params(axis='both', which='major', labelsize=8) 
ax1.set_title("Winning Absolute Variables", fontsize=8,
              FontProperties=font, fontweight='bold')


corr2 = Wrelative_values.corr()
mask = np.zeros_like(corr2, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
ax2 = sns.heatmap(corr2, mask=mask, cmap="RdGy",linewidths=.5, 
                  vmin=0, vmax=1,ax=axes[1],cbar=False,square=True)
ax2.tick_params(axis='both', which='major', labelsize=8) 
ax2.set_title("Winning Relative Variables", fontsize=8,
             FontProperties=font, fontweight='bold')


corr3 = Labsolute_values.corr()
mask = np.zeros_like(corr3, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
ax3 = sns.heatmap(corr3, mask=mask, cmap="RdGy",linewidths=.5, 
                  vmin=0, vmax=1,ax=axes[2],cbar=False,square=True)
ax3.tick_params(axis='both', which='major', labelsize=8) 
ax3.set_title("Losing Absolute Variables", fontsize=8,
             FontProperties=font, fontweight='bold')


corr4 = Lrelative_values.corr()
mask = np.zeros_like(corr4, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
ax4 = sns.heatmap(corr4, mask=mask, cmap="RdGy",linewidths=.5, 
                  vmin=0, vmax=1,ax=axes[3],square=True, cbar = False) 
ax4.tick_params(axis='both', which='major', labelsize=8) 
ax4.set_title("Losing Relative Variables", fontsize=8,
             FontProperties=font, fontweight='bold')


plt.subplots_adjust(hspace=1)
plt.show()

In the above plot, we can see that which variable is more correlated with winning score, winning points, losing score and losing points. such as winning score more correlated with WTO (turnovers committed), Wstl(steals).
**Note:** Darker color show more correlation and light color show less correlation.

### Relationship between Tournament Wins(tW) And Regular Season Wins(rsW)
How deep a team goes in the tournament would be regular season wins. Let’s see how regular season wins relate to tournament progress each year.
**Note:**The Tournament "play-in" games are on days 134/135, Selection Sunday is on day 132, the final day of the regular season is also day 132, and so on.


In [None]:
win_s = season_results.groupby(['Season','WTeamID'])['WTeamID'].count().to_frame('rsW')
win_t = tour_results.groupby(['Season','WTeamID'])['WTeamID'].count().to_frame('tW')
win_team = win_s.merge(win_t,on=['Season','WTeamID']).reset_index()

sns.set_style("whitegrid")
ax=sns.lmplot(x='rsW' , y='tW', data=win_team, col='Season', col_wrap=4,line_kws={'color': 'black'},)
plt.show()

In nearly every year, tournament wins is positively correlated with regular season wins.Let’s do something similar to see if average scores during regular season are associated with better tournament progress.

In [None]:
w_season_results = season_results.rename(columns={'WTeamID':'TeamID'}) # rename the WteamID as TeamID for merge with mteams dataframe
st_seed_win_team = w_season_results.merge(mtourney_seed, on=['TeamID','Season'])
st_seed_win_team = st_seed_win_team.rename(columns={'TeamID':'WTeamID'})
seed_win_team = st_seed_win_team.merge(win_t,on=['Season','WTeamID']).reset_index()

sns.set_style("whitegrid")
plt.figure(figsize=(15,10))
ax=sns.regplot(x='Seed' , y='tW', marker='o', data=seed_win_team, line_kws={'color': 'black'})
ax.set_title('Tournament Wins by Seed',loc='left',fontsize=30, FontProperties=font, fontweight='bold')
ax.set_xlabel('Seed',fontsize=12)
ax.set_ylabel('Tournament Wins', fontsize=12)
plt.show()

I’ve introduced some jiter to this plot to avoid overplotting. It exhibits a strong negative relationship between seed and tournament progress - the lower a team’s seed, the deeper they go into the tournament (as measured by tournament wins). We see that a 16 seed has never made it past the first round of the tournament. From the plot we can also determine that the lowest seed to ever win the tournament was a number 8. 

In [None]:
tw_season_results = season_results.merge(win_t,on=['Season','WTeamID']).reset_index()
temp = tw_season_results[tw_season_results['tW'] >=4]

h1 = sns.jointplot(x="WFGM", y="WFTA", data=temp,
                  kind="scatter",color='k',s=10,height=6)
h1.ax_marg_x.hist(temp['WFGM'], color="steelblue", alpha=.6)
h1.ax_marg_y.hist(temp['WFTA'], color="steelblue", alpha=.6,
                     orientation="horizontal")
h1.fig.suptitle('Regular Season Shooting Performance of Tournament Teams',fontsize=20, FontProperties=font, fontweight='bold')
h1.set_axis_labels('Field Goals','Free Throws Attempted', fontsize=12)
h1.fig.subplots_adjust(top=0.9)

h2 = sns.jointplot(x="WFGM", y="WAst", data=temp,
                  kind="scatter",color='k',s=10,height=6)
h2.ax_marg_x.hist(temp['WFGM'], color="darkred", alpha=.6)
h2.ax_marg_y.hist(temp['WAst'], color="darkred", alpha=.6,
                     orientation="horizontal")
h2.fig.suptitle('Regular Season Assist Performance of Tournament Teams',fontsize=20, FontProperties=font, fontweight='bold')
h2.set_axis_labels('Field Goals','Assist', fontsize=12)
h2.fig.subplots_adjust(top=0.9)

h3 = sns.jointplot(x="WFGM", y="WStl", data=temp,
                  kind="scatter",color='k',s=10,height=6)
h3.ax_marg_x.hist(temp['WFGM'], color="darkgreen", alpha=.6)
h3.ax_marg_y.hist(temp['WStl'], color="darkgreen", alpha=.6,
                     orientation="horizontal")
h3.fig.suptitle('Regular Season Steals Performance of Tournament Teams',fontsize=20, FontProperties=font, fontweight='bold')
h3.set_axis_labels('Field Goals','Steal', fontsize=12)
h3.fig.subplots_adjust(top=0.9)

Interestingly in terms of shooting and stealing, there does not seem to be much of a difference between teams that make the Final Four and the rest of the tournament field in terms of their regular season performance; however it is hard to tell from this plot type. 

## <font color='lime'>Give me your feedback and if you find my kernel helpful please UPVOTE will be appreciated</font>