In [None]:
pip install autoviz

In [None]:
from autoviz.AutoViz_Class import AutoViz_Class
AV = AutoViz_Class()

In [None]:
%tensorflow_version 2.x
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
from google.colab import files
import io
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.preprocessing import LabelEncoder, OneHotEncoder,OrdinalEncoder
from sklearn.model_selection import StratifiedKFold, GridSearchCV, RepeatedStratifiedKFold, RepeatedKFold, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler, KBinsDiscretizer, RobustScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, RidgeClassifier, SGDClassifier, PassiveAggressiveClassifier
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import BernoulliNB, GaussianNB
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, AdaBoostClassifier, BaggingClassifier, ExtraTreesClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
import xgboost as xgb
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.metrics import accuracy_score, make_scorer, confusion_matrix

In [None]:
file = files.upload()

In [None]:
df_train = pd.read_csv(io.BytesIO(file['train.csv']))
df_test = pd.read_csv(io.BytesIO(file['test.csv']))

## Train

In [None]:
df_train

In [None]:
df_train.describe()

In [None]:
df_train.isna().sum()

In [None]:
df_train.columns

In [None]:
af = AV.AutoViz("",dfte=df_train,verbose=1,depVar='price_range', lowess=True)

In [None]:
for column in df_train.columns:
  print("Column: {}, Unique: {}".format(column, df_train[column].unique()))

In [None]:
df_train.info()

In [None]:
sb.boxplot(x=df_train['price_range'],y=df_train['clock_speed'])

In [None]:
sb.boxplot(x=df_train['price_range'],y=df_train['m_dep'])

In [None]:
sb.boxplot(x=df_train['price_range'],y=df_train['n_cores'])

In [None]:
correlation = df_train.corr() 
correlation.style.background_gradient(cmap='PuBu').set_precision(2)

In [None]:
fig, ax = plt.subplots(figsize=(15,15)) 
sb.heatmap(correlation, annot=True, cmap='PuBu', linewidths=1.5, ax=ax)

## Test

In [None]:
df_test.drop(columns=['id'], axis=1, inplace=True)

In [None]:
df_test

In [None]:
df_test.describe()

In [None]:
df_test.info()

In [None]:
df_test.isna().sum()

In [None]:
df_test.columns

In [None]:
af_test = AV.AutoViz("",dfte=df_test,verbose=1, lowess=True)

In [None]:
for column in df_test.columns:
  print("Column: {}, Unique: {}".format(column, df_test[column].unique()))

In [None]:
correlation = df_test.corr() 
correlation.style.background_gradient(cmap='PuBu').set_precision(2)

In [None]:
fig, ax = plt.subplots(figsize=(15,15)) 
sb.heatmap(correlation, annot=True, cmap='PuBu', linewidths=1.5, ax=ax)

## Labels

In [None]:
labels_train = df_train.pop('price_range')
labels_train

## Classifier


*   Different Classifier Testing

---



---







*   Classifier



In [None]:
KNC = KNeighborsClassifier()
XGB = xgb.XGBClassifier(objective='multi:softmax', learning_rate=0.2, n_estimators=100, max_depth=3)
BNB = BernoulliNB()
GNB = GaussianNB()
DTC = DecisionTreeClassifier()
ETC = ExtraTreeClassifier()
RFC = RandomForestClassifier()
ETsC = ExtraTreesClassifier()
GBC = GradientBoostingClassifier()


estimator = [('kneighbor', KNC),
             ('xgb', XGB),
             ('bernoulli', BNB),
             ('gaussian', GNB),
             ('decisiontree', DTC),
             ('extratree', ETC),
             ('randomforest', RFC),
             ('extratrees', ETsC),
             ('gradientboosting', GBC),
             ]


classifier = [KNeighborsClassifier(), 
              xgb.XGBClassifier(objective='multi:softmax', learning_rate=0.2, n_estimators=100, max_depth=3),
              RidgeClassifier(), 
              SGDClassifier(), 
              PassiveAggressiveClassifier(),
              BernoulliNB(), 
              GaussianNB(),
              DecisionTreeClassifier(), 
              ExtraTreeClassifier(),
              RandomForestClassifier(),
              LinearSVC(),
              SVC(),
              ExtraTreesClassifier(), 
              GradientBoostingClassifier()
              ]
ensemble = [
            VotingClassifier(estimators=estimator, voting='hard'),
            VotingClassifier(estimators=estimator, voting='soft'),
            AdaBoostClassifier(n_estimators=100),
            BaggingClassifier(n_estimators=100)           
]



---





*   Ensemble


In [None]:
for k in range(0,len(ensemble)):
    model = ensemble[k]
    model = model.fit(df_train,labels_train)
    kfold1 = StratifiedKFold(n_splits=10)
    results_kfold =cross_val_score(model, df_train, labels_train, cv=kfold1)
    print("Accuracy: %.2f%%" % (results_kfold.mean()*100.0))
    print("{} Done".format(ensemble[k]))
    print('\n')



*   Normal checking



In [None]:
for est in classifier:
  print(est)
  model = est
  model = model.fit(df_train, labels_train)
  kfold1 = StratifiedKFold(n_splits=10)
  results_kfold =cross_val_score(model, df_train, labels_train, cv=kfold1)
  print("Accuracy: %.2f%%" % (results_kfold.mean()*100.0))
  print('\n')



*   Pipeline


In [None]:
for est in classifier:
  print(est)
  
  pipe = Pipeline([
                 ('robust', RobustScaler()),
                 ('standard', StandardScaler()),
                 ('estimator', est)
  ])
  model = pipe.fit(df_train,labels_train)
  kfold1 = StratifiedKFold(n_splits=10)
  results_kfold =cross_val_score(model, df_train, labels_train, cv=kfold1)
  print("Accuracy: %.2f%%" % (results_kfold.mean()*100.0))
  print('\n')



---



---



In [None]:
model = xgb.XGBClassifier(objective='multi:softmax', learning_rate=0.2, n_estimators=100, max_depth=3)



*   Fit



In [None]:
model = model.fit(df_train,labels_train)



*   K-Fold CV



In [None]:
kfold1 = StratifiedKFold(n_splits=10)
results_kfold =cross_val_score(model, df_train, labels_train, cv=kfold1)
print("Accuracy: %.2f%%" % (results_kfold.mean()*100.0))



*   Predict



In [None]:
f2 = model.predict(df_test)





##  Result



In [None]:
final = pd.DataFrame(columns=['id','price_range'])

In [None]:
final['id'] = df_test.index+1
final['price_range'] = f2

In [None]:
final