In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
# read data from csv
import pandas as pd 
df = pd.read_csv("../input/amd-vs-intel/AMDvIntel.csv")
df.info()

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
# Drop Name and price because it doesnt impact for classification
df = df.drop(['Name','Price'],axis=1)
df.head()

In [None]:
# Analyse all the features
df.hist(bins = 20,figsize=(20,20))
df.plot()

In [None]:
# Analyze how each feature influence the Target variable(IorA)

import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(15,20))

plt.subplot(2,2,1)
sns.barplot(x = df['IorA'], y = df['Cache(M)'])

plt.subplot(2,2,2)
sns.barplot(x = df['IorA'], y = df['Cores'])

plt.subplot(2,2,3)
sns.barplot(x = df['IorA'], y = df['Threads'])

plt.subplot(2,2,4)
sns.barplot(x = df['IorA'], y = df['Speed(GHz)'])

plt.show()

In [None]:
# Analyse individual feature count

plt.figure(figsize=(15,30))

plt.subplot(4,1,1)
df['Cache(M)'].value_counts().plot(kind='bar')

plt.subplot(4,1,2)
df.Cores.value_counts().plot(kind='bar')

plt.subplot(4,1,3)
df.Threads.value_counts().plot(kind='bar')

plt.subplot(4,1,4)
df['Speed(GHz)'].value_counts().plot(kind='bar')


plt.show()

In [None]:
# Analyze how each field in the feature influence the Target variable(IorA)
plt.figure(figsize=(15,30))

plt.subplot(4,1,1)
sns.countplot(x = 'Cache(M)',hue = 'IorA',data = df)

plt.subplot(4,1,2)
sns.countplot(x = 'Cores',hue = 'IorA',data = df)

plt.subplot(4,1,3)
sns.countplot(x = 'Threads',hue = 'IorA',data = df)

plt.subplot(4,1,4)
sns.countplot(x = 'Speed(GHz)',hue = 'IorA',data = df)

plt.show()

In [None]:
# Correlation between the variables
df.corr().style.format("{:.2}").background_gradient(cmap=plt.get_cmap('coolwarm'), axis=1)

In [None]:
df.info()

In [None]:
df.columns

In [None]:
# Define X and y
X = df.iloc[:,1:5] 
X.head()

In [None]:
y = df.loc[:,['IorA']]
y.head()

In [None]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 10)

In [None]:
X_train.shape

In [None]:
X_test.shape

In [None]:
y_train.shape

In [None]:
y_test.shape

# Logistics

In [None]:
# Apply Model

# Training the model
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score

model_logr  = LogisticRegression()
model_logr .fit(X_train,y_train)

In [None]:
# Predicting the model
y_predict_log = model_logr.predict(X_test)

In [None]:
# Test accuracy
print('Logistics Test Accuracy:', accuracy_score(y_test,y_predict_log))

In [None]:
# Precision, Recall

from sklearn.metrics import classification_report
print(classification_report(y_test,y_predict_log))

In [None]:
# Confusion Matrix
confusion_matrix(y_test,y_predict_log)

# Support Vector Machine

In [None]:
# Training the model
from sklearn.svm import SVC
from sklearn.model_selection import RandomizedSearchCV

parameters = {'kernel': ['rbf'], 'gamma': [0.1,1,5], 'C': [0.1,1,10,100]}

rbf_svc = RandomizedSearchCV(SVC(),parameters).fit(X_train,y_train)

In [None]:
print('Best Parameter',rbf_svc.best_params_)

In [None]:
# Predicting the model
y_predict_svm = rbf_svc.predict(X_test)

In [None]:
# Test accuracy
print('SVM Test Accuracy:', accuracy_score(y_test,y_predict_svm))

In [None]:
# Precision, Recall
print(classification_report(y_test,y_predict_svm))

In [None]:
# Confusion Matrix
confusion_matrix(y_test,y_predict_svm)

# Decision Tree with GridSearchCV

In [None]:
# Training the model
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV

classifier_dtg=DecisionTreeClassifier(random_state=42,splitter='best')
parameters=[{'min_samples_split':[2,3,4,5],'criterion':['gini']},{'min_samples_split':[2,3,4,5],'criterion':['entropy']}]

model_dectree=GridSearchCV(estimator=classifier_dtg, param_grid=parameters, scoring='accuracy',cv=10)
model_dectree.fit(X_train,y_train)

In [None]:
print('Best Estimator:',model_dectree.best_estimator_)

In [None]:
print('Best Parameter:',model_dectree.best_params_)

In [None]:
print('Best Score:',model_dectree.best_score_)

In [None]:
# Predicting the model
y_predict_dtree = model_dectree.predict(X_test)

In [None]:
# Test accuracy
print('DT GS Test Accuracy:', accuracy_score(y_test,y_predict_dtree))

In [None]:
# Precision, Recall
print(classification_report(y_test,y_predict_dtree))

In [None]:
# Confusion Matrix
confusion_matrix(y_test,y_predict_dtree)

# Random Forest with GridSearchCV

In [None]:
# Training the model
from sklearn.ensemble import RandomForestClassifier

classifier_rfg=RandomForestClassifier(random_state=33,n_estimators=23)
parameters=[{'min_samples_split':[2,3,4,5],'criterion':['gini','entropy'],'min_samples_leaf':[1,2,3]}]

model_gridrf=GridSearchCV(estimator=classifier_rfg, param_grid=parameters, scoring='accuracy',cv=10)
model_gridrf.fit(X_train,y_train)

In [None]:
print('Best Estimators:',model_gridrf.best_estimator_)

In [None]:
print('Best Parameters:',model_gridrf.best_params_)

In [None]:
print('Best Score:',model_gridrf.best_score_)

In [None]:
# Predict the model
y_predict_rf = model_gridrf.predict(X_test)

In [None]:
# Test accuracy
print('RF Test Accuracy:', accuracy_score(y_test,y_predict_rf))

In [None]:
# Precision, Recall
print(classification_report(y_test,y_predict_rf))

In [None]:
# Confusion Matrix
confusion_matrix(y_test,y_predict_rf)

# Naive Bayes

In [None]:
# Training the model
from sklearn.naive_bayes import BernoulliNB
model_nb = BernoulliNB()
model_nb.fit(X_train,y_train)

In [None]:
# Predict the model
y_predict_nb = model_nb.predict(X_test)

In [None]:
# Test accuracy
print('NB Test Accuracy:', accuracy_score(y_test,y_predict_nb))

In [None]:
# Precision, Recall
print(classification_report(y_test,y_predict_nb))

In [None]:
# Confusion Matrix
confusion_matrix(y_test,y_predict_nb)

# K-Nearest Nieghbor

In [None]:
# Training the model
from sklearn.neighbors import KNeighborsClassifier
model_knn = KNeighborsClassifier(n_neighbors=6,metric='euclidean') # Maximum accuracy for n=10
model_knn.fit(X_train,y_train)

In [None]:
# Predicting the model
y_predict_knn = model_knn.predict(X_test)

In [None]:
# Test accuracy
print('Test Accuracy:', accuracy_score(y_test,y_predict_knn))

In [None]:
# Precision, Recall
print(classification_report(y_test,y_predict_knn))

In [None]:
# Confusion Matrix
confusion_matrix(y_test,y_predict_knn)

# XGBoost Classifier

In [None]:
# Training the model
from xgboost import XGBClassifier
model_xgb = XGBClassifier(max_depth=5,
                     n_estimators=100,
                     subsample=.8,
                     learning_rate=0.1,
                     reg_alpha=0,
                     reg_lambda=1,
                     colsample_bynode=0.6,
                     colsample_bytree=0.5,
                     gamma = 0)
model_xgb.fit(X_train,y_train)

In [None]:
# Predicting the model
y_predict_xgb = model_xgb.predict(X_test)

In [None]:
# Test accuracy
print('XGB Test Accuracy:', accuracy_score(y_test,y_predict_xgb))

In [None]:
# Precision, Recall
print(classification_report(y_test,y_predict_xgb))

In [None]:
# Confusion Matrix
confusion_matrix(y_test,y_predict_xgb)

# Artificial Neural Network

In [None]:
# Training the model
from sklearn.neural_network import MLPClassifier
model_mlp = MLPClassifier(hidden_layer_sizes=(100,100,100),batch_size=10,learning_rate_init=0.01,max_iter=2000,random_state=10)
model_mlp.fit(X_train,y_train)

In [None]:
# Predicting the model
y_predict_mlp = model_mlp.predict(X_test)

In [None]:
# Test accuracy
print('ANN Test Accuracy:', accuracy_score(y_test,y_predict_mlp))

In [None]:
# Precision, Recall
print(classification_report(y_test,y_predict_mlp))

In [None]:
# Confusion Matrix
confusion_matrix(y_test,y_predict_mlp)