## Applied ML for baseball:

#### By Thomas Maxence Franco 
Submitted to the Faculty of Science in partial fulfillment of the requirements for the degree of 
#### Master of Modeling for Science and Engineering 
at the 
#### UNIVERSITAT AUTÒNOMA DE BARCELONA 
Directed by 
Tomás Manuel Margalef Burrull
July 2024


In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import KFold
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from itertools import combinations

In [2]:
file_path = "C:\\Users\\mfran\\OneDrive - UAB\\Masters\\Thesis\\v2\\tables\\finalbatdf.csv"
df = pd.read_csv(file_path)


In [3]:
df.head()

Unnamed: 0,Name,contract_years,salary,AAV,minor league,catcher,date,new_team,former_team,Age,...,SB_3,BB%_3,K%_3,AVG_3,OBP_3,SLG_3,GDP_3,CS_3,H_3,OPS_3
0,Randal Grichuk,1,2.0,2.0,0,0,2024,ARI,LAA,32,...,0.0,0.049541,0.209174,0.240704,0.280734,0.422701,17.0,3.0,123.0,0.703435
1,Joc Pederson,1,12.5,12.5,0,0,2024,ARI,SFG,32,...,2.0,0.081081,0.243243,0.237762,0.309771,0.421911,9.0,3.0,102.0,0.731683
2,Kevin Newman,1,0.946237,0.946237,0,0,2024,ARI,CIN,30,...,6.0,0.048736,0.074007,0.226306,0.264599,0.309478,7.0,1.0,117.0,0.574076
3,Tucker Barnhart,2,6.5,3.25,0,1,2024,ARI,LAD,33,...,0.0,0.074742,0.257732,0.247126,0.31701,0.367816,8.0,0.0,86.0,0.684826
4,Lourdes Gurriel Jr.,3,42.0,14.0,0,0,2024,ARI,ARI,30,...,1.0,0.05915,0.18854,0.276,0.318519,0.466,8.0,3.0,138.0,0.784519


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 276 entries, 0 to 275
Data columns (total 61 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Name              276 non-null    object 
 1   contract_years    276 non-null    int64  
 2   salary            276 non-null    float64
 3   AAV               276 non-null    float64
 4   minor league      276 non-null    int64  
 5   catcher           276 non-null    int64  
 6   date              276 non-null    int64  
 7   new_team          276 non-null    object 
 8   former_team       276 non-null    object 
 9   Age               276 non-null    int64  
 10  WAR3              276 non-null    float64
 11  Yrs               276 non-null    int64  
 12  career_games      276 non-null    int64  
 13  stayed_same_team  276 non-null    int64  
 14  G                 270 non-null    float64
 15  PA                270 non-null    float64
 16  HR                270 non-null    float64
 1

In [5]:
df2 = df.rename(columns={'minor league': 'min_league', 'date':'year'})
df2.head()

Unnamed: 0,Name,contract_years,salary,AAV,min_league,catcher,year,new_team,former_team,Age,...,SB_3,BB%_3,K%_3,AVG_3,OBP_3,SLG_3,GDP_3,CS_3,H_3,OPS_3
0,Randal Grichuk,1,2.0,2.0,0,0,2024,ARI,LAA,32,...,0.0,0.049541,0.209174,0.240704,0.280734,0.422701,17.0,3.0,123.0,0.703435
1,Joc Pederson,1,12.5,12.5,0,0,2024,ARI,SFG,32,...,2.0,0.081081,0.243243,0.237762,0.309771,0.421911,9.0,3.0,102.0,0.731683
2,Kevin Newman,1,0.946237,0.946237,0,0,2024,ARI,CIN,30,...,6.0,0.048736,0.074007,0.226306,0.264599,0.309478,7.0,1.0,117.0,0.574076
3,Tucker Barnhart,2,6.5,3.25,0,1,2024,ARI,LAD,33,...,0.0,0.074742,0.257732,0.247126,0.31701,0.367816,8.0,0.0,86.0,0.684826
4,Lourdes Gurriel Jr.,3,42.0,14.0,0,0,2024,ARI,ARI,30,...,1.0,0.05915,0.18854,0.276,0.318519,0.466,8.0,3.0,138.0,0.784519


The columns without a prefix '_2' or '_3' are the statistics from the last played season to that date (could be 2024,2023 or 2022). We have less missing values from those than the ones ending in _2 or _3. I chose to fill those values with the last season values to not lose data. This is still good practice as we are not making up data, just spreading a year of performance by that same player. 

In [6]:
# Fills missing values in columns with '_2' suffix using corresponding values from columns without '_2' suffix
for col_suffix in ['G','PA', 'HR', 'R', 'RBI', 'SB', 'BB%', 'K%', 'AVG', 'OBP', 'SLG', 'GDP', 'CS', 'H', 'OPS']:
    df2[f'{col_suffix}_2'].fillna(df2[col_suffix], inplace=True)

# Fills missing values in columns with '_3' suffix using corresponding values from columns without '_3' suffix
for col_suffix in ['G','PA', 'HR', 'R', 'RBI', 'SB', 'BB%', 'K%', 'AVG', 'OBP', 'SLG', 'GDP', 'CS', 'H', 'OPS']:
    df2[f'{col_suffix}_3'].fillna(df2[col_suffix], inplace=True)


In [7]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 276 entries, 0 to 275
Data columns (total 61 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Name              276 non-null    object 
 1   contract_years    276 non-null    int64  
 2   salary            276 non-null    float64
 3   AAV               276 non-null    float64
 4   min_league        276 non-null    int64  
 5   catcher           276 non-null    int64  
 6   year              276 non-null    int64  
 7   new_team          276 non-null    object 
 8   former_team       276 non-null    object 
 9   Age               276 non-null    int64  
 10  WAR3              276 non-null    float64
 11  Yrs               276 non-null    int64  
 12  career_games      276 non-null    int64  
 13  stayed_same_team  276 non-null    int64  
 14  G                 270 non-null    float64
 15  PA                270 non-null    float64
 16  HR                270 non-null    float64
 1

for those missing values, which are 6, i will drop them as we dont have any performance data and doesnt affect the size of our dataframe

In [8]:
df2.dropna(inplace=True)

In [9]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 270 entries, 0 to 275
Data columns (total 61 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Name              270 non-null    object 
 1   contract_years    270 non-null    int64  
 2   salary            270 non-null    float64
 3   AAV               270 non-null    float64
 4   min_league        270 non-null    int64  
 5   catcher           270 non-null    int64  
 6   year              270 non-null    int64  
 7   new_team          270 non-null    object 
 8   former_team       270 non-null    object 
 9   Age               270 non-null    int64  
 10  WAR3              270 non-null    float64
 11  Yrs               270 non-null    int64  
 12  career_games      270 non-null    int64  
 13  stayed_same_team  270 non-null    int64  
 14  G                 270 non-null    float64
 15  PA                270 non-null    float64
 16  HR                270 non-null    float64
 1

In [10]:
float_to_int_columns = ['G', 'PA', 'HR', 'R', 'RBI', 'SB', 'GDP', 'CS', 'H',
                        'G_2', 'PA_2', 'HR_2', 'R_2', 'RBI_2', 'SB_2', 'GDP_2', 'CS_2', 'H_2',
                        'G_3', 'PA_3', 'HR_3', 'R_3', 'RBI_3', 'SB_3', 'GDP_3', 'CS_3', 'H_3']


df2[float_to_int_columns] = df2[float_to_int_columns].astype(int)

df2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 270 entries, 0 to 275
Data columns (total 61 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Name              270 non-null    object 
 1   contract_years    270 non-null    int64  
 2   salary            270 non-null    float64
 3   AAV               270 non-null    float64
 4   min_league        270 non-null    int64  
 5   catcher           270 non-null    int64  
 6   year              270 non-null    int64  
 7   new_team          270 non-null    object 
 8   former_team       270 non-null    object 
 9   Age               270 non-null    int64  
 10  WAR3              270 non-null    float64
 11  Yrs               270 non-null    int64  
 12  career_games      270 non-null    int64  
 13  stayed_same_team  270 non-null    int64  
 14  G                 270 non-null    int32  
 15  PA                270 non-null    int32  
 16  HR                270 non-null    int32  
 1

In [11]:
print("\nSummary Statistics:")
print(df2.describe())


Summary Statistics:
       contract_years     salary         AAV  min_league     catcher  \
count      270.000000  270.00000  270.000000  270.000000  270.000000   
mean         1.588889   19.97166    5.611029    0.359259    0.162963   
std          1.657883   64.00335    8.354012    0.480674    0.370018   
min          1.000000    0.25000    0.250000    0.000000    0.000000   
25%          1.000000    0.72000    0.720000    0.000000    0.000000   
50%          1.000000    1.75000    1.750000    0.000000    0.000000   
75%          1.000000    8.87500    7.000000    1.000000    0.000000   
max         11.000000  700.00000   70.000000    1.000000    1.000000   

              year         Age        WAR3         Yrs  career_games  ...  \
count   270.000000  270.000000  270.000000  270.000000    270.000000  ...   
mean   2023.011111   31.892593    2.665556    8.351852    763.977778  ...   
std       0.783109    2.949315    3.788718    3.126559    453.834198  ...   
min    2022.000000   2

### Interest Rates

In [12]:
df2.loc[df2['year'] == 2024, 'salary'] *= 0.994
df2.loc[df2['year'] == 2023, 'salary'] *= 1.103
df2.loc[df2['year'] == 2022, 'salary'] *= 1.1408 ## its wrong
df2.head()

Unnamed: 0,Name,contract_years,salary,AAV,min_league,catcher,year,new_team,former_team,Age,...,SB_3,BB%_3,K%_3,AVG_3,OBP_3,SLG_3,GDP_3,CS_3,H_3,OPS_3
0,Randal Grichuk,1,1.988,2.0,0,0,2024,ARI,LAA,32,...,0,0.049541,0.209174,0.240704,0.280734,0.422701,17,3,123,0.703435
1,Joc Pederson,1,12.425,12.5,0,0,2024,ARI,SFG,32,...,2,0.081081,0.243243,0.237762,0.309771,0.421911,9,3,102,0.731683
2,Kevin Newman,1,0.94056,0.946237,0,0,2024,ARI,CIN,30,...,6,0.048736,0.074007,0.226306,0.264599,0.309478,7,1,117,0.574076
3,Tucker Barnhart,2,6.461,3.25,0,1,2024,ARI,LAD,33,...,0,0.074742,0.257732,0.247126,0.31701,0.367816,8,0,86,0.684826
4,Lourdes Gurriel Jr.,3,41.748,14.0,0,0,2024,ARI,ARI,30,...,1,0.05915,0.18854,0.276,0.318519,0.466,8,3,138,0.784519


In [13]:
df2.loc[df2['year'] == 2024, 'AAV'] *= 0.994
df2.loc[df2['year'] == 2023, 'AAV'] *= 1.103
df2.loc[df2['year'] == 2022, 'AAV'] *= 1.1408
df2.head()

Unnamed: 0,Name,contract_years,salary,AAV,min_league,catcher,year,new_team,former_team,Age,...,SB_3,BB%_3,K%_3,AVG_3,OBP_3,SLG_3,GDP_3,CS_3,H_3,OPS_3
0,Randal Grichuk,1,1.988,1.988,0,0,2024,ARI,LAA,32,...,0,0.049541,0.209174,0.240704,0.280734,0.422701,17,3,123,0.703435
1,Joc Pederson,1,12.425,12.425,0,0,2024,ARI,SFG,32,...,2,0.081081,0.243243,0.237762,0.309771,0.421911,9,3,102,0.731683
2,Kevin Newman,1,0.94056,0.94056,0,0,2024,ARI,CIN,30,...,6,0.048736,0.074007,0.226306,0.264599,0.309478,7,1,117,0.574076
3,Tucker Barnhart,2,6.461,3.2305,0,1,2024,ARI,LAD,33,...,0,0.074742,0.257732,0.247126,0.31701,0.367816,8,0,86,0.684826
4,Lourdes Gurriel Jr.,3,41.748,13.916,0,0,2024,ARI,ARI,30,...,1,0.05915,0.18854,0.276,0.318519,0.466,8,3,138,0.784519


In [14]:
df2.head()

Unnamed: 0,Name,contract_years,salary,AAV,min_league,catcher,year,new_team,former_team,Age,...,SB_3,BB%_3,K%_3,AVG_3,OBP_3,SLG_3,GDP_3,CS_3,H_3,OPS_3
0,Randal Grichuk,1,1.988,1.988,0,0,2024,ARI,LAA,32,...,0,0.049541,0.209174,0.240704,0.280734,0.422701,17,3,123,0.703435
1,Joc Pederson,1,12.425,12.425,0,0,2024,ARI,SFG,32,...,2,0.081081,0.243243,0.237762,0.309771,0.421911,9,3,102,0.731683
2,Kevin Newman,1,0.94056,0.94056,0,0,2024,ARI,CIN,30,...,6,0.048736,0.074007,0.226306,0.264599,0.309478,7,1,117,0.574076
3,Tucker Barnhart,2,6.461,3.2305,0,1,2024,ARI,LAD,33,...,0,0.074742,0.257732,0.247126,0.31701,0.367816,8,0,86,0.684826
4,Lourdes Gurriel Jr.,3,41.748,13.916,0,0,2024,ARI,ARI,30,...,1,0.05915,0.18854,0.276,0.318519,0.466,8,3,138,0.784519


drop salary and contract years as we only need AAV. MLBAMID,PlayerId, Name, new_team, former_team and year wont be need

In [17]:
columns_to_drop = ['salary', 'contract_years', 'MLBAMID', 'PlayerId', 'year', 'Name', 'new_team', 'former_team', 'min_league']

df3 = df2.drop(columns=columns_to_drop)

df3.head()

Unnamed: 0,AAV,catcher,Age,WAR3,Yrs,career_games,stayed_same_team,G,PA,HR,...,SB_3,BB%_3,K%_3,AVG_3,OBP_3,SLG_3,GDP_3,CS_3,H_3,OPS_3
0,1.988,0,32,2.2,10,1141,0,118,471,16,...,0,0.049541,0.209174,0.240704,0.280734,0.422701,17,3,123,0.703435
1,12.425,0,32,2.1,10,1140,0,121,425,15,...,2,0.081081,0.243243,0.237762,0.309771,0.421911,9,3,102,0.731683
2,0.94056,0,30,1.1,6,505,0,74,253,3,...,6,0.048736,0.074007,0.226306,0.264599,0.309478,7,1,117,0.574076
3,3.2305,1,33,-0.4,10,881,0,47,123,1,...,0,0.074742,0.257732,0.247126,0.31701,0.367816,8,0,86,0.684826
4,13.916,0,30,7.8,6,613,1,145,592,24,...,1,0.05915,0.18854,0.276,0.318519,0.466,8,3,138,0.784519


## Feature Selection

In [18]:
df3.select_dtypes(include=['number']).corr().style.background_gradient("coolwarm", vmin=-1, vmax=1)

Unnamed: 0,AAV,catcher,Age,WAR3,Yrs,career_games,stayed_same_team,G,PA,HR,R,RBI,SB,BB%,K%,AVG,OBP,SLG,GDP,CS,H,OPS,G_2,PA_2,HR_2,R_2,RBI_2,SB_2,BB%_2,K%_2,AVG_2,OBP_2,SLG_2,GDP_2,CS_2,H_2,OPS_2,G_3,PA_3,HR_3,R_3,RBI_3,SB_3,BB%_3,K%_3,AVG_3,OBP_3,SLG_3,GDP_3,CS_3,H_3,OPS_3
AAV,1.0,-0.145862,-0.109508,0.84371,0.074646,0.238871,-0.029755,0.493035,0.594139,0.738762,0.720267,0.684967,0.404414,0.210469,-0.160432,0.295417,0.337224,0.496663,0.426687,0.345175,0.641957,0.456141,0.422035,0.500808,0.586846,0.591167,0.562094,0.205003,0.119858,-0.138755,0.307468,0.311442,0.43441,0.35581,0.293086,0.527646,0.413415,0.341807,0.439178,0.558221,0.539663,0.497699,0.237231,0.168357,-0.077036,0.262069,0.30002,0.425746,0.268472,0.259492,0.441625,0.40457
catcher,-0.145862,1.0,-0.00434,-0.183725,-0.139721,-0.29423,0.092898,-0.315471,-0.296819,-0.216436,-0.313009,-0.289725,-0.246619,0.016242,0.119093,-0.170735,-0.139382,-0.206394,-0.170111,-0.241925,-0.308177,-0.189232,-0.254104,-0.248251,-0.193734,-0.291208,-0.240453,-0.239701,-0.013704,0.203359,-0.235652,-0.203284,-0.226746,-0.112968,-0.223485,-0.277757,-0.231205,-0.243523,-0.244617,-0.182046,-0.29377,-0.218948,-0.237773,-0.00067,0.209346,-0.203888,-0.169896,-0.170843,-0.129681,-0.278804,-0.271362,-0.180817
Age,-0.109508,-0.00434,1.0,0.018564,0.811208,0.673684,-0.225365,0.141622,0.126278,0.068801,0.067463,0.123548,-0.054711,0.027035,-0.093667,0.121424,0.135378,0.136963,0.139135,-0.089586,0.101805,0.143891,0.145451,0.143969,0.121708,0.102208,0.175299,-0.095642,0.143019,-0.102002,0.113389,0.176119,0.174276,0.186253,-0.108597,0.125863,0.185228,0.22095,0.209151,0.200972,0.176699,0.250668,-0.036405,0.103601,-0.108229,0.134639,0.176634,0.211291,0.249212,-0.099837,0.192671,0.211133
WAR3,0.84371,-0.183725,0.018564,1.0,0.141213,0.296051,-0.113085,0.572801,0.653733,0.684946,0.741241,0.687286,0.507097,0.18549,-0.195267,0.328506,0.357962,0.491564,0.470821,0.389716,0.69023,0.461794,0.520361,0.611656,0.597198,0.685523,0.616196,0.337723,0.109464,-0.21709,0.419771,0.406089,0.489759,0.453807,0.369829,0.657394,0.486899,0.421983,0.525912,0.554552,0.609364,0.539402,0.354885,0.134181,-0.12553,0.358072,0.36693,0.465178,0.325745,0.347029,0.551039,0.456639
Yrs,0.074646,-0.139721,0.811208,0.141213,1.0,0.885156,-0.323974,0.305445,0.289024,0.238397,0.245689,0.292058,0.028404,0.007446,-0.089466,0.18351,0.182105,0.251754,0.259113,0.008079,0.266995,0.235984,0.347469,0.335552,0.277448,0.29624,0.338294,0.02117,0.121834,-0.138175,0.186187,0.233208,0.285903,0.293519,0.009808,0.299217,0.282775,0.450014,0.424632,0.36596,0.386963,0.431594,0.08594,0.097047,-0.171966,0.225976,0.253057,0.321896,0.368414,0.063692,0.380704,0.315669
career_games,0.238871,-0.29423,0.673684,0.296051,0.885156,1.0,-0.284195,0.505093,0.500441,0.410081,0.454484,0.49158,0.175742,0.061328,-0.187907,0.222972,0.228798,0.336983,0.430128,0.171511,0.476504,0.309486,0.496119,0.518278,0.42778,0.487711,0.509121,0.130738,0.136141,-0.259841,0.262459,0.300633,0.368836,0.443967,0.126556,0.478339,0.364717,0.57581,0.599791,0.522844,0.579984,0.601613,0.214264,0.132017,-0.273546,0.292014,0.31909,0.390231,0.487445,0.186464,0.558341,0.387276
stayed_same_team,-0.029755,0.092898,-0.225365,-0.113085,-0.323974,-0.284195,1.0,-0.261365,-0.218837,-0.109709,-0.148681,-0.18078,-0.042301,0.004771,-0.132346,-0.000797,-0.005239,-0.071457,-0.173601,-0.07653,-0.186595,-0.047141,-0.384845,-0.35235,-0.275714,-0.280423,-0.308448,-0.151453,-0.131847,0.087379,-0.241593,-0.286135,-0.319452,-0.248267,-0.145541,-0.316895,-0.325635,-0.439592,-0.409273,-0.310352,-0.343853,-0.349973,-0.161912,-0.031294,0.138401,-0.280654,-0.253033,-0.324712,-0.299912,-0.173901,-0.379282,-0.317595
G,0.493035,-0.315471,0.141622,0.572801,0.305445,0.505093,-0.261365,1.0,0.95865,0.706307,0.885585,0.873989,0.456872,0.08334,-0.315141,0.358301,0.349026,0.512125,0.751836,0.469093,0.921983,0.470898,0.615915,0.664809,0.520697,0.651318,0.605738,0.300938,0.039984,-0.340516,0.417897,0.380101,0.467648,0.516649,0.29434,0.665911,0.462019,0.550797,0.610981,0.539536,0.626025,0.584607,0.283452,0.055169,-0.249539,0.368518,0.335977,0.433725,0.430538,0.272171,0.612303,0.423471
PA,0.594139,-0.296819,0.126278,0.653733,0.289024,0.500441,-0.218837,0.95865,1.0,0.772864,0.944584,0.929151,0.455983,0.114702,-0.333313,0.383242,0.378256,0.537526,0.795435,0.461242,0.979315,0.499344,0.61245,0.69171,0.582602,0.697258,0.658937,0.264389,0.076412,-0.32144,0.425243,0.398474,0.490199,0.562676,0.28483,0.703216,0.484315,0.554236,0.646906,0.600025,0.672681,0.641329,0.274639,0.102459,-0.257778,0.396413,0.380796,0.473572,0.477489,0.248262,0.652421,0.467583
HR,0.738762,-0.216436,0.068801,0.684946,0.238397,0.410081,-0.109709,0.706307,0.772864,1.0,0.853299,0.899935,0.314824,0.201029,-0.125318,0.301665,0.339936,0.646097,0.575875,0.307844,0.762261,0.551204,0.48487,0.558298,0.656351,0.592002,0.624008,0.125141,0.157899,-0.114696,0.281849,0.312671,0.480875,0.412863,0.220583,0.5481,0.445486,0.435972,0.522103,0.655035,0.578167,0.585811,0.150647,0.210511,-0.083235,0.257267,0.320391,0.464025,0.36518,0.180269,0.491805,0.438481


In [None]:
correlation_matrix = df3.corr()

correlation_with_AAV = correlation_matrix['AAV'].sort_values(ascending=False)

print("Correlation with AAV:")
print(correlation_with_AAV)

# Extracting predictors (X) and target variable (y) from df3
X = df3.drop(columns=['AAV'])  # Assuming 'target_column' is the name of your target variable
y = df3['AAV']

# Define the number of folds
k = 10

# Initialize a k-fold cross-validation splitter
kf = KFold(n_splits=k, shuffle=True)

# Initialize variables to store the best subset and its corresponding error
best_subset = None
best_error = float('inf')

# Loop through each possible subset of predictors
for r in range(1, len(X.columns) + 1):
    for subset in combinations(X.columns, r):
        cv_errors = []
        for train_index, val_index in kf.split(X):
            X_train, X_val = X.iloc[train_index][list(subset)], X.iloc[val_index][list(subset)]
            y_train, y_val = y.iloc[train_index], y.iloc[val_index]

            # Fit model
            model = LinearRegression()
            model.fit(X_train, y_train)

            # Predict on validation set
            y_pred = model.predict(X_val)

            # Calculate Mean Squared Prediction Error
            mse = mean_squared_error(y_val, y_pred)
            cv_errors.append(mse)

        # Average the errors across all folds to get CV error for this subset
        CV_error = np.mean(cv_errors)

        # Update best subset if CV error is lower
        if CV_error < best_error:
            best_error = CV_error
            best_subset = subset

# best_subset now contains the column names of the best subset of predictors


# Plot MSPR results against the number of variables
plt.plot(predictors, mspr_values, marker='o', linestyle='-')
plt.xlabel('Number of Variables')
plt.ylabel('Mean Squared Prediction Error (MSPR)')
plt.title('MSPR vs. Number of Variables')
plt.grid(True)
plt.show()
