In [61]:
#Data Manipulation libraries
import pandas as pd
import numpy as np

# Visualization libraries
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Predictive Analytics models
import sklearn
from sklearn import linear_model
from sklearn.metrics import mean_squared_error
from sklearn.model_selection  import train_test_split

#loading the datasets
speedstar_train = pd.read_csv('train.csv') 

speedstar_test = pd.read_csv('test.csv') 

In [62]:
#function to remove string values by converting their values to 0s and 1s
def codex (speedstar_input):
  cat_vars=['Job','Marital','Education','Default','HHInsurance','CarLoan',\
            'Communication','LastContactMonth','PrevAttempts']
  for var in cat_vars:
    cat_list = pd.get_dummies(speedstar_train[var], prefix=var)
    speedstar_input = pd.merge(speedstar_input,cat_list,left_index=True, right_index=True)
  speedstar_input.drop(cat_vars,axis=1,inplace=True)
  return speedstar_input

In [63]:
#function to remove string values by converting their values to 0s and 1s
def codex (speedstar_input):
  cat_vars=['Job','Marital','Education','Default','HHInsurance','CarLoan',\
            'Communication','LastContactMonth','PrevAttempts']
  for var in cat_vars:
    cat_list = pd.get_dummies(speedstar_test[var], prefix=var)
    speedstar_input = pd.merge(speedstar_input,cat_list,left_index=True, right_index=True)
  speedstar_input.drop(cat_vars,axis=1,inplace=True)
  return speedstar_input

In [64]:
#applying the functions
speedstar_train = codex(speedstar_train)
speedstar_test = codex(speedstar_test)

In [65]:
#Check for the column mismatch
speedstar_test_columns= set(speedstar_test.columns.to_list())
speedstar_train_columns= set(speedstar_train.columns.to_list())
speedstar_train_columns.difference(speedstar_test_columns)

{'CarInsurance'}

In [66]:
#setting the target column
speedstar_y = speedstar_train['CarInsurance']
speedstar_x = speedstar_train[speedstar_test.columns]

In [67]:
#function for converting the time columns to durations
def getduration (speedstar_input):
  speedstar_input['CallStart'] = pd.to_datetime(speedstar_input['CallStart'])
  speedstar_input['CallEnd'] = pd.to_datetime(speedstar_input['CallEnd'])

  speedstar_input['Duration'] = speedstar_input['CallEnd'] - speedstar_input['CallStart']
  speedstar_input.drop(['CallStart','CallEnd'],axis=1,inplace=True)
  speedstar_input['Duration'] = speedstar_input['Duration'].dt.total_seconds()
  return speedstar_input



In [None]:
#applying function to datasets
speedstar_x= getduration(speedstar_x)
speedstar_test = getduration(speedstar_test)

In [69]:
#drop Id column
speedstar_x.drop(['Id'],axis=1,inplace=True)

In [70]:
#drop Id column
speedstar_test.drop(['Id'],axis=1,inplace=True)

In [71]:
#setting the values for Outcome to float
d = {'failure': -1, 'success': 1, 'other': 0,'':0}
speedstar_x['Outcome'] = speedstar_x['Outcome'].map(d)

In [77]:
#setting the values for Outcome to float
d = {'failure': -1, 'success': 1, 'other': 0,'':0}
speedstar_test['Outcome'] = speedstar_test['Outcome'].map(d)

In [None]:
#clf = LogisticRegression(random_state=0, max_iter = 1000).fit(df_x, df_y)
speedstar_x.columns[speedstar_x.isna().any()].tolist()
#df_x['Outcome'] = df_x['Outcome'].fillna(0)


In [79]:
#Removing NA columns
speedstar_test['Outcome'] = speedstar_test['Outcome'].fillna(0)
speedstar_x['Outcome'] = speedstar_x['Outcome'].fillna(0)


In [None]:
#Apply Logistic Regression
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(random_state=0, max_iter = 1000).fit(speedstar_x, speedstar_y)
#print(df_x['Outcome'].head)

In [81]:
#predict result
speedstar_results = clf.predict(speedstar_test)

In [82]:
#Adding value to list
speedstar_results_list=speedstar_results.tolist()

In [83]:
#Adding the Id column back
speedstar_results_list=[('CP' + str(i),val) for i,val in enumerate(speedstar_results_list)]

In [84]:
print(speedstar_results_list)

[('CP0', 0), ('CP1', 0), ('CP2', 1), ('CP3', 0), ('CP4', 0), ('CP5', 1), ('CP6', 1), ('CP7', 0), ('CP8', 0), ('CP9', 0), ('CP10', 0), ('CP11', 0), ('CP12', 1), ('CP13', 0), ('CP14', 0), ('CP15', 0), ('CP16', 0), ('CP17', 0), ('CP18', 0), ('CP19', 0), ('CP20', 0), ('CP21', 0), ('CP22', 1), ('CP23', 0), ('CP24', 0), ('CP25', 1), ('CP26', 1), ('CP27', 0), ('CP28', 1), ('CP29', 0), ('CP30', 0), ('CP31', 1), ('CP32', 0), ('CP33', 0), ('CP34', 0), ('CP35', 1), ('CP36', 0), ('CP37', 0), ('CP38', 0), ('CP39', 0), ('CP40', 0), ('CP41', 0), ('CP42', 0), ('CP43', 0), ('CP44', 1), ('CP45', 0), ('CP46', 0), ('CP47', 1), ('CP48', 0), ('CP49', 0), ('CP50', 0), ('CP51', 1), ('CP52', 0), ('CP53', 1), ('CP54', 0), ('CP55', 0), ('CP56', 0), ('CP57', 1), ('CP58', 0), ('CP59', 0), ('CP60', 0), ('CP61', 0), ('CP62', 1), ('CP63', 0), ('CP64', 0), ('CP65', 0), ('CP66', 0), ('CP67', 1), ('CP68', 1), ('CP69', 0), ('CP70', 0), ('CP71', 1), ('CP72', 1), ('CP73', 0), ('CP74', 0), ('CP75', 0), ('CP76', 0), ('CP77',

In [85]:
#creating final dataframe for required columns
speedstar_final = pd.DataFrame(speedstar_results_list,columns=['Id','CarInsurance'])


In [86]:
#Writing to a CSV file
speedstar_final.to_csv("Spectrum_logistic.csv")

In [95]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV
params = {'learning_rate':[0.15,0.1,0.05,0.01],'n_estimators':[20,40,60,80,100]}
speedstar_gradient = GradientBoostingClassifier(max_depth=2, random_state = 0).fit(speedstar_x,speedstar_y)
final_model= GridSearchCV(estimator = speedstar_gradient, param_grid=params,cv=5)
final_model=final_model.fit(speedstar_x,speedstar_y)
y_gradient_new = final_model.predict(speedstar_test)
speedstar_results_list=y_gradient_new.tolist()
speedstar_results_list=[('CP' + str(i),val) for i,val in enumerate(speedstar_results_list)]
speedstar_grid = pd.DataFrame(speedstar_results_list,columns=['Id','CarInsurance'])
speedstar_grid.to_csv("Spectrum_grid.csv")