In [None]:
import pandas as pd
import numpy as np
import pandas_profiling
from datetime import datetime
import re
import numbers

from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.linear_model import LinearRegression , Lasso , Ridge, ElasticNet, SGDRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVR , LinearSVR
from sklearn import metrics
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.tree import DecisionTreeRegressor
import statsmodels.api as sm
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor, VotingRegressor

%matplotlib inline
import matplotlib.pyplot as plt 
plt.rc("font", size=14)
import seaborn as sns
sns.set(style="white")
sns.set(style="whitegrid", color_codes=True)

import warnings
warnings.filterwarnings("ignore")

In [None]:
org_train = pd.read_csv('Data_Train.csv');
org_test = pd.read_csv('Data_Test.csv');
print('org_train',org_train.shape);
print('org_test',org_test.shape);

In [None]:
org_train.head(5)

In [None]:
org_test.head(5)

In [None]:
#org_test['Views'] = np.nan;

In [None]:
org_data = pd.concat([org_train,org_test],axis='rows',sort=False)

In [None]:
org_data.shape

In [None]:
org_data.duplicated().sum()

In [None]:
org_data.columns

In [None]:
org_data.tail(10)

In [None]:
#pandas_profiling.ProfileReport(org_data)

In [None]:
org_data.describe().T

In [None]:
org_data.info()

In [None]:
org_data[pd.isnull(org_data['Song_Name'])]

In [None]:
#org_data.drop(org_data.index[31398],inplace=True)

In [None]:
org_data['Genre'].replace({'all-music' : 'allmusic'},inplace=True);

In [None]:
genList = pd.unique(org_data['Genre']).tolist();

In [None]:
org_data['Name'].replace({
  '3' : 'custhree',
  'â˜ ï¸SÊœá´€Ê€á´€X OÒ“Ò“Éªá´„Éªá´€ÊŸâ˜ ï¸' : 'cuselectronic',
  "[DJWiLlY '19]✔" : 'cusdjwilly',
  '☆LiL PEEP☆' : 'cuslilpeepa',
  '★☞ Azteca PDLK ☜★' : 'cusazteca',
  'Ã‘engo Flow Official' : 'cusanengo',
  'Đ.BoomBaa 🐱🐱🐱' : 'cusboombaa',
  '↪ DJ JUNINHO 22' : 'cusjuniho',
  'ä»™æ°´é¢¨æ¥½' : 'cusaeec',
  '༄ Tha Trickaz ☁' : 'custrick',
  'Đạt BoomBaa' : 'cusatboom',
  'Äá»©c Durex' : 'cusdurex',
  'Adictos A Los Corridosâœ…' : 'cusadictos',
  'áƒ¦ ìŠˆë¹„_[à¹‘B T Sà¹‘]' : 'cusafis',
  'AminÃ©' : 'cusamino',
  'Anuel AA ✅' : 'cusAnuel',
  "Ar'mon And Trey" : 'custrey',
  '♤♡♢♧El Fran Rt♤♡♢♧' : 'cusfrance',
  "A'SOUNG" : 'cussoung',
  'Atif Aslam ✪' : 'cusaslam',
  'Bad Bunny â€“ X100Pre (Ãlbum)' : 'cusbadbunny',
  'boppin™' : 'cusboppina',
  'Bean Xinh ❂' : 'cusbeanxin',
  'Bảo Huỳnh' : 'cusbaeohua',
  'Chuoi Tây ✪' : 'cuschuoi',
  "B'Small DJ" : 'cussmalldj',
  'CÅfresi' : 'cusfresh',
  'Chuột Đow' : 'cuschuaw',
  'Cá»‘p Cá»‘p' : 'cuscap',
  'Connor♛' : 'cuscannora',
  'Ð¢Ð¸Ð¼Ð° Ð‘ÐµÐ»Ð¾Ñ€ÑƒÑÑÐºÐ¸Ñ…' : 'cusddd',
  'Dương Đức Cương ✪ 0868425758' : 'cusdaeing',
  'Declan Devine ✪' : 'cusdeclan',
  'CRYJAXX Too 🌐' : 'cuscry',
  'DJ ANDERSON DO PARAÃSO' : 'cusdjandersondo',
  'Disciple ♛ ♜ ♞' : 'cusdiscipline',
  "Deezay Phong House's" : 'cusdeezay',
  "DJ Bướg's" : 'cusdjbae',
  'DJ ALEXIS TUME ☑' : 'cusdjalex',
  'DIVINE ✨' : 'cusdivineae',
  'DJ CABELÃO DO TURANO (BAILE DA AUSTRÁLIA) ®' : 'cusdjcabela',
  'DJ DÅ©ng Pham' : 'cusdjdang',
  'DEFÎ›LT' : 'cusdefilt',
  'DJ DENILSON DO CHAPADÃO 🇪🇬' : 'cusdjdeni',
  'DJ Jesús Sánchez' : 'cusdjjesa',
  'DJ GUSTAVO MIX ®' : 'cusdjgus',
  'DJ ENJOY Official ✪' : 'cusdjenjoy',
  'DJ WJ DA INESTAN | TROPA DO GORDÃƒO'  :'cusdjwjda',
  'DJ VINICIN DO CONCÃ“RDIA' : 'cusdjvinicin',
  'DJ TiLÃ´' : 'cusdjtila',
  'DJ XICLAUDIO 🇮🇶' : 'cusdjxicl',
  'DJ TX Producer ✪' : 'cusdjtx',
  'DJ PENOSO DA CDM 🇨🇮' : 'cusdjpenoso',
  'Drop Central 💧' : 'cusdjcentral',
  "👻 Hi I'm Ghost 👻" : 'cushighost',
  '🌸fatboibari🌸': 'cusfatboi',
  '🍺 Hiruko 🍺' : 'cushiruko',
  'ÊŸá´œá´„á´€ ÊŸá´œsÊœ' : 'cuseya',
  'EDM Vietnam ✅' : 'cusedmvietnam',
  'El Zorillio Tribalerio *Dj Zorra Mix*' : 'cusdjzorra',
  'El Compa Chilo Oficial 🎶🎶' : 'cuselcompa',
  'galaxy music ✪' : 'cusdjmusic',
  'Giật' : 'cusgiat',
  'GUAPOTREY👽' : 'cusguapotrey',
  'Ha Banana ✪' : 'cushabanana',
  'Hybrid Trap 🔥' : 'cushybrid',
  'HEYKERI🌸' : 'cusheyker',
  'HU₵₵I' : 'cushuaua',
  'Jon Z ✅' : 'cusjonz',
  'JadÅ« Dala' : 'cusjada',
  'Jhené Aiko' : 'cusjhena',
  'Jack Ü' : 'cusjackae',
  'ï¼³ï¼¨ï¼¥eï¼³ï¼¨' : 'cusiiiy',
  '태태 Daily' : 'cusififoe',
  'JoÃ£o Sousa' : 'cusjoaeo',
  'k$upreme' : 'cussupreme',
  'Ken77 💎' : 'cusken77',
  'K2N â™¥ K-Pop 1st' : 'cusk2na',
  'KeeBin ✪' : 'cuskeebin',
  'ka$h steezy' : 'cuskash',
  'KNY FÎ›CTORY' : 'cusknyfactory',
  'Khoa Dương' : 'cuskhoa',
  'L2Share♫52' : 'cusls52',
  'L2Share♫59' : 'cusls59',
  'L2Share♫55' : 'cusls55',
  'L2Share♫49' : 'cusls49',
  'L2Share♫42' : 'cusls42',
  'L2Share♫77' : 'cusls77',
  'L2Share♫79' : 'cusls79',
  'L2Share♫66' : 'cusls66',
  'L2Share♫78' : 'cusls78',
  'L2Share♫80' : 'cusls80',
  'LEEDJ - Fb: Bùi Kim Tân - DJ MR.LEE ✪' : 'cusleeddj',
  'Lenny TavÃ¡rez' : 'cuslenny',
  'Lil Tecca ✰' : 'cuslil',
  'La Casa Urbana ✅' : 'cuslacasa',
  'Liam Cleary 💯😎♛' : 'cusliam',
  'LibeikastÃ²nem' : 'cuslibeik',
  'MELHORES PAGODES ✪' : 'cusmelhores',
  'Mincafé' : 'cusmincaf',
  'MOHAMED HALIM ✪' : 'cusmohamed',
  'More Fruit 💦🌿🍎' : 'cusmore',
  'mxrÃ§h/pt.10' : 'cusmxra',
  'Music Mhragnat - ميوزك مهرجانات' : 'cusmusicmhra',
  'NESCAFÃ‰ Basement' : 'cusnescafe',
  'Nguyễn Tài Trí­' : 'cusnguya',
  'Nguyễn Công Danh' : 'cusdanh',
  'Ø´Ø¹Ø¨ÙŠ Ø³Ø§ÙˆÙ†Ø¯' : 'cusooosu',
  'ó €' : 'cusoe',
  'OFFSET â€“ FATHER OF 4 (ALBUM)' : 'cusoffset',
  'Ø¹Ù…Ø±Ùˆ' : 'cusouou',
  'Quá»³nh Anh Shin' : 'cusquanh',
  'RÃœFÃœS DU SOL' : 'cusrafa',
  'RyanMcRandal♈' : 'cusryanmc',
  'Phong Hải Nguyễn' : 'cusphong',
  'RD Urbans Music ✅' : 'cusrdurban',
  'Rodrigo LeÃ³n' : 'cusrodrigo',
  'Stickybuds~' : 'cusstickybud',
  'SangChjvas (Đích Bự)' : 'cussangchi',
  'Smooky MarGielaa 🍇' : 'cussmooth',
  'SterkÃ¸l' : 'cussterk',
  'sad frosty :(' : 'cussadfrost',
  'Sunmin Jeong_선민' : 'cussunmin',
  'TiÃ«sto' : 'custias',
  'Trapeton Tv ✅' : 'custrapton',
  'The Trap House ✅' : 'custraphou',
  'TOON KIDS MUSIC®' : 'custoon',
  'VINXEN🇰🇷' : 'cusvinxen',
  'VÅ©,' : 'cusvac',
  "Ujico*/Snail's House" : 'cusujico',
  'Văn Nguyên' : 'cusvafn',
  'Ù…Ù‡Ø±Ø¬Ø§Ù†Ø§Øª' : 'cusuusu',
  'Việt' : 'cusviat',
  'W. A. Production®' : 'cusproduction',
  'Yvng JalapeÃ±o' : 'cusyvng',
  'Wooli 🐘' : 'cuswooi',
  'weef leaks*' : 'cusweekleaks',
  'شعبي ساوند' : 'cusurdu1',
  'Тима Белорусских' : 'custennna',
  '仙水風楽' : 'cusjapene',
  'مهرجانات' : 'cusurdu2',
  'عمرو' : 'cusurdu3',
  'ANUEL AA ✅' : 'cusanuelaaa',
  '☠️SʜᴀʀᴀX Oғғɪᴄɪᴀʟ☠️' : 'cusahrax',
  '- S E C K O M -' : 'seckom',
  'ღ 슈비_[๑B T S๑]' : 'cusemojii',
  'Vũ,' : 'cusvuuu',
  'K2N ♥ K-Pop 1st' : 'cusk2nkpop',
  '\U000e0020' : 'cusunics',
    'Trippie Redd\x7f' : 'cusremovesla',
    'ʟᴜᴄᴀ ʟᴜsʜ' : 'cuslucalush',
    'Adictos A Los Corridos✅' : 'cusadictosalos',
    'ＳＨＥeＳＨ' : 'cusSheshh'
},inplace=True)

In [None]:
def remEmo(strVal): 
   encoded = strVal.encode('ascii', 'ignore').decode('ascii')
   return re.sub('[^A-Za-z0-9]+', '', strVal)

In [None]:
org_data['Name'] = org_data['Name'].transform(lambda x: remEmo(x))

In [None]:
for i in genList:
   print(i,'------------')
   print(pd.unique(org_data[org_data['Genre'] == i ]['Name']).tolist())

In [None]:
org_data[pd.isnull(org_data['Name'])]

# Convert TimeStamp to days

In [None]:
def numOfDays(date1): 
    datetime_object = datetime.strptime(date1, '%Y-%m-%d %H:%M:%S.%f')
    return (datetime.now()-datetime_object).days

In [None]:
org_data['Timestamp1'] = org_data['Timestamp'].transform(lambda x : numOfDays(x));

In [None]:
org_data['Timestamp'] = org_data['Timestamp'].transform(lambda x : pd.Timestamp(x));

# Converting String to Int

In [None]:
def replaceUnit(val):
    if(val.find('K') != -1):
        return val.replace('K', ('000' if val.find('.') == -1 else '00'));
    elif(val.find('M') != -1):
        return val.replace('M','000000' if val.find('.') == -1 else '00000');
    else:
        return val

In [None]:
def strToInt(val):
    chk = replaceUnit(val)
    conStr = re.sub(r'[^0-9]+', '', chk);
    return pd.to_numeric(conStr)

In [None]:
def converThousandsToUnits(x):
    th=x.split('.')
    if(len(th) == 1):
        dotSeprator = th[0].replace('K','')
        return pd.to_numeric(dotSeprator+'000')
    else:
        dotSeprator = th[1].replace('K','')
        return pd.to_numeric(th[0]+dotSeprator+''.ljust(3-len(dotSeprator), '0'))

In [None]:
def converMillionToUnits(x):
    th=x.split('.')
    if(len(th) == 1):
        dotSeprator = th[0].replace('M','')
        return pd.to_numeric(dotSeprator+'000000')
    else:
        dotSeprator = th[1].replace('M','')
        return pd.to_numeric(th[0]+dotSeprator+''.ljust(6-len(dotSeprator), '0'))

In [None]:
def convertObjectToInt(x):
    if(not re.compile(',') .search(x) == None):
        return pd.to_numeric(x.replace(',',''))
    elif(not re.compile('K') .search(x) == None):
        return converThousandsToUnits(x)
    elif(not re.compile('M') .search(x) == None):
        return converMillionToUnits(x)
    else:
        return pd.to_numeric(x) 

In [None]:
#for i in org_data['Likes'].unique():
   # print(i)

In [None]:
org_data['Likes'] = org_data['Likes'].transform(lambda x : convertObjectToInt(x));
org_data['Popularity'] = org_data['Popularity'].transform(lambda x : convertObjectToInt(x));
#org_data['Comments'] = org_data['Popularity'].transform(lambda x : convertObjectToInt(x));

In [None]:
isinstance(14017.0, float)

In [None]:
org_data.info()

In [None]:
org_data.head(5)

In [None]:
org_data.corr()

In [None]:
org_data['Song_Name'].unique()

In [None]:
org_data.describe().T

In [None]:
#pd.crosstab(org_train["Likes"],org_train["Views"]).div(pd.crosstab(org_train["Likes"],org_train["Views"]).sum(1), axis =0).plot(kind = "bar", stacked= True)

In [None]:
#pd.crosstab(org_train["Popularity"],org_train["Views"]).div(pd.crosstab(org_train["Popularity"],org_train["Views"]).sum(1), axis =0).plot(kind = "bar", stacked= True)

In [None]:
org_data.columns

In [None]:
org_data.drop(columns=['Song_Name','Unique_ID','Timestamp','Country','Timestamp1','Followers'], inplace=True);

In [None]:
org_data.shape

In [None]:
sns.heatmap(org_data.corr())

In [None]:
cat_col = org_data.select_dtypes(exclude=np.number)
num_col = org_data.select_dtypes(include=np.number)

In [None]:
one_hot = pd.get_dummies(cat_col)

In [None]:
org_mod = pd.concat([one_hot,num_col], axis='columns');

In [None]:
org_mod.head(5)

In [None]:
org_mod_train= org_mod[org_mod['Views'].notna()]
org_mod_test= org_mod[org_mod['Views'].isna()]
print(org_mod.shape,org_mod_train.shape,org_mod_test.shape)

In [None]:
org_mod_train_x = org_mod_train.drop(columns='Views')
org_mod_train_y = org_mod_train['Views']
x_train_split,x_test_split,y_train_split,y_test_split=train_test_split(org_mod_train_x,org_mod_train_y,test_size=0.3, random_state = 0)
org_mod_test_x = org_mod_test.drop(columns='Views')

In [None]:
Rmse_score = [];
AlgorthimName = [];
train_rmse = [];
test_rmse = [];
def model_fit(model,x_train,y_train,x_testSplit,y_testSplit,x_test,algorthimName,fileName,paramName,paramValue,plot=False,exportFile = False):
    model.fit(x_train,y_train)
    y_train_predicted = model.predict(x_train);
    y_test_split_pred = model.predict(x_testSplit)
    y_test_predicted = model.predict(x_test);
    print(" R2 Score :",r2_score(y_train,y_train_predicted))
    print(" R2 Score Test:",r2_score(y_testSplit,y_test_split_pred))
    rm_Score = np.sqrt(mean_squared_error(y_train,y_train_predicted));
    rm_ScoreTest = np.sqrt(mean_squared_error(y_testSplit,y_test_split_pred));
    model.sco
    train_rmse.append(rm_Score)
    test_rmse.append(rm_ScoreTest)
    print('RMSE Score of {}'.format(algorthimName), rm_Score)
    print('RMSE Score of Test {}'.format(algorthimName), rm_ScoreTest)
#     Rmse_score.append(rm_Score)
#     AlgorthimName.append(algorthimName)
    if(plot):
        resut = pd.DataFrame([paramValue,train_rmse,test_rmse]).T
        resut.columns = [paramName, "train", "test"]
        resut.plot(x = paramName,y=["train","test"])
    if(exportFile):
        y_test_predicted_df = pd.DataFrame(y_test_predicted,columns=["Views"])
        result = pd.concat([org_test[['Unique_ID']],y_test_predicted_df],axis=1)
        result.to_excel(fileName,index=False)

In [None]:
def model_fit_old(model,x_train,y_train,x_test,algorthimName,fileName,exportFile = False):
    model.fit(x_train,y_train)
    y_train_predicted = model.predict(x_train);
    y_test_predicted = model.predict(x_test);
    print(" R2 Score :",r2_score(y_train,y_train_predicted))
#     print(" Train Accuracy :",accuracy_score(y_train,y_train_predicted))
#     print(" Test Accuracy :",accuracy_score(y_test,y_test_predicted))
    #Train_accuracy.append(accuracy_score(y_train,y_train_predicted))
    #Test_accuracy.append(accuracy_score(y_test,y_test_predicted))
    rm_Score = np.sqrt(mean_squared_error(y_train,y_train_predicted));
    print('RMSE Score of {}'.format(algorthimName), rm_Score)
#     Rmse_score.append(rm_Score)
#     AlgorthimName.append(algorthimName)
    if(exportFile):
        y_test_predicted_df = pd.DataFrame(y_test_predicted,columns=["Views"])
        result = pd.concat([org_test[['Unique_ID']],y_test_predicted_df],axis=1)
        result.to_excel(fileName,index=False)

# linear Regression 
and Regularization but no scaling

In [None]:
linear = LinearRegression()
#model_fit_old(linear,org_mod_train_x,org_mod_train_y,org_mod_test_x,'Linear Regression','linear2.xlsx')
#model_fit(linear,x_train_split,y_train_split,x_test_split,y_test_split,org_mod_test_x,'Linear Regression','linear1.xlsx','','')

In [None]:
#lasso = Lasso()
#model_fit(lasso,org_mod_train_x,org_mod_train_y,org_mod_test_x,'Lasso Linear Regression','lasso.xlsx')

In [None]:
#ridge = Ridge()
#model_fit(ridge,org_mod_train_x,org_mod_train_y,org_mod_test_x,'Ridge Linear Regression','ridge.xlsx')

In [None]:
#elastic = ElasticNet()
#model_fit(elastic,org_mod_train_x,org_mod_train_y,org_mod_test_x,'elastic Linear Regression','elstic.xlsx')

In [None]:
#poly = PolynomialFeatures()
#poly_train_x = poly.fit_transform(org_mod_train_x)
#poly_test_x = poly.fit_transform(org_mod_test_x)
#linear = LinearRegression()
#model_fit_poly(linear,poly_train_x,org_mod_train_y,poly_test_x,'Polynomial','polynomial.xlsx')

# OLS Ordinary Least Square

In [None]:
olsModel = sm.OLS(org_mod_train_y, org_mod_train_x).fit()
y_train_pred = olsModel.predict(org_mod_train_x)
y_test_pred = olsModel.predict(org_mod_test_x)
rm_Score = np.sqrt(mean_squared_error(org_mod_train_y,y_train_pred));
print('RMSE Score of OLS', rm_Score)

# Gradient Descent

In [None]:
#sgd = SGDRegressor()
#model_fit(sgd,org_mod_train_x,org_mod_train_y,org_mod_test_x,'Gradient Descent','sgd.xlsx')

# Random Forest

In [None]:
rnd = RandomForestRegressor();
#model_fit_old(rnd,org_mod_train_x,org_mod_train_y,org_mod_test_x,'Random Forest','rnd1.xlsx')
#model_fit(rnd,x_train_split,y_train_split,x_test_split,y_test_split,org_mod_test_x,'Random Forest','rnd.xlsx','','')

# Boosting

In [None]:
#adaBoost = AdaBoostRegressor()
#model_fit(adaBoost,org_mod_train_x,org_mod_train_y,org_mod_test_x,'Ada Boosting','ada.xlsx')

In [None]:
gauBoost = GradientBoostingRegressor()
#model_fit_old(gauBoost,org_mod_train_x,org_mod_train_y,org_mod_test_x,'Graudient Boosting','gdb1.xlsx')
#model_fit(gauBoost,x_train_split,y_train_split,x_test_split,y_test_split,org_mod_test_x,'Graudient Boosting','gdb.xlsx','','')

# Support Vector Machine

In [None]:
#svm = SVR(kernel='linear')
#model_fit(svm,org_mod_train_x,org_mod_train_y,org_mod_test_x,'SVM','svm.xlsx')

In [None]:
svmLinear = LinearSVR()
#model_fit_old(svmLinear,org_mod_train_x,org_mod_train_y,org_mod_test_x,'SVM Linear','svmLinear.xlsx')
#model_fit(svmLinear,x_train_split,y_train_split,x_test_split,y_test_split,org_mod_test_x,'SVM Linear','svmLinear.xlsx','','')

# Decision Tree 
If it criterion = 'mse' no need to apply hyperparameter

In [None]:
decisionTree = DecisionTreeRegressor(criterion='mse')
#model_fit_old(decisionTree,org_mod_train_x,org_mod_train_y,org_mod_test_x,'Decision Tree Regression','decision1.xlsx')
#model_fit(decisionTree,x_train_split,y_train_split,x_test_split,y_test_split,org_mod_test_x,'Decision Tree Regression','decision1.xlsx','','')

In [None]:
learning_rate = [0.1,0.05,0.01,0.001]
def check_learn(lRate):
    for i in lRate:
        print("Learning Rate ----------------------- = ",i)
        dt = GradientBoostingRegressor(criterion='friedman_mse',learning_rate=i)
        model_fit_old(gauBoost,org_mod_train_x,org_mod_train_y,org_mod_test_x,'Graudient Boosting', 'gdb.xlsx');
        #model_fit(gauBoost,x_train_split,y_train_split,x_test_split,y_test_split,org_mod_test_x,'Graudient Boosting','gdb.xlsx')
#check_learn(learning_rate)       

In [None]:
n_estimate = [15,20,25,30]
def check_dept(max_depth_check):
    for i in max_depth_check:
        print("n_estimate ----------------------- = ",i)
        dt = GradientBoostingRegressor(criterion='friedman_mse',learning_rate=0.1,n_estimators=i)
        model_fit(gauBoost,x_train_split,y_train_split,x_test_split,y_test_split,org_mod_test_x,'Graudient Boosting','gdb.xlsx')
#check_dept(n_estimate)

In [None]:
max_depth_check = [10,20,30]
def check_dept(max_depth_check):
    indexi = 0;
    for i in max_depth_check:
        indexi+= 1;
        print("Max_ Depth ----------------------- = ",i,indexi)
        dt = DecisionTreeRegressor(max_depth=i)
        #model_fit_old(rnd,org_mod_train_x,org_mod_train_y,org_mod_test_x,'Random Forest','rnd1.xlsx')
        model_fit(dt,x_train_split,y_train_split,x_test_split,y_test_split,org_mod_test_x,'Decision','decisionHyp.xlsx','max_depth_check',max_depth_check, True if(len(max_depth_check) == indexi) else False,False)
#check_dept(max_depth_check)

In [None]:
min_samples_split = [0.1, 1.0, 10, 20]
def check_split(min_samples_split):
    for i in min_samples_split:
        print("Minimun Sample split ----------------------- = ",i)
        dt = GradientBoostingRegressor(criterion='friedman_mse',min_samples_split=i)
        model_fit(gauBoost,x_train_split,y_train_split,x_test_split,y_test_split,org_mod_test_x,'Graudient Boosting','gdb.xlsx')
#check_split(min_samples_split)    

In [None]:
dParams = {
    "criterion": ["mse"],
    "min_samples_split": [10, 20, 40],
    "max_depth": [8,10,12,14,18,20],
    "min_samples_leaf": [20, 40, 100],
    "max_leaf_nodes": [5, 20, 100]
    }
lasParams  ={
    "alpha" : [0.1, 0.5, 1],
    "normalize" : [True]
}
dt = DecisionTreeRegressor()
gridCV = GridSearchCV(dt,dParams,cv=5)
gridCV.fit(x_train_split,y_train_split)