In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
%matplotlib inline
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [None]:
df = pd.read_csv('/kaggle/input/dataset-of-laptop-users/Laptop-Users.csv')

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.columns

In [None]:
df['Has Laptop'].value_counts()

In [None]:
label = ['YES','NO']
fig = px.pie(labels=label,values=df['Has Laptop'].value_counts(), names = label,width = 900, height = 700)
fig.update_traces(textposition = 'inside', 
                  textinfo = 'percent + label', 
                  hole = 0.8, 
                  marker = dict(colors = ['#ff6361','#1e434c','#ffa600'], line = dict(color = 'white', width = 2)))

fig.update_layout(annotations = [dict(text = 'Laptop', 
                                      x = 0.5, y = 0.5, font_size = 38, showarrow = False, 
                                      font_family = 'monospace',
                                      font_color = 'black')],
                  showlegend = False)

# Gender

In [None]:
plt.figure(figsize=(17,9))
sns.set_style('white')
sns.set_context("poster", font_scale = .75)
plt.subplot(1,3,1)
sns.countplot(x='Gender',data=df,color='coral')
plt.xlabel('Gender')
plt.ylabel('Count')
plt.title('Gender Vs Count')
plt.xticks(rotation='vertical')
plt.grid()
hspace=0.9
plt.subplot(1,3,2)
sns.boxplot(x='Gender',data=df,y='Age',color='limegreen')
plt.xlabel('Gender')
plt.ylabel('Age')
plt.title('Gender Vs Age')
plt.xticks(rotation='vertical')
plt.grid()
plt.subplot(1,3,3)
sns.barplot(x='Gender',data=df,y='Income',color='cyan')
plt.xlabel('Gender')
plt.ylabel('Income')
plt.title('Gender Vs Income')
plt.xticks(rotation='vertical')
plt.grid()
plt.suptitle('Gender')
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(16,9))
sns.set_context("poster", font_scale = .75)
sns.set_style('darkgrid')
plt.subplot(121)
sns.kdeplot(x='Income',data=df,hue='Gender',fill=True,palette=["#ff6361","#ffa600"], alpha=.8, linewidth=0)
plt.title('Gender Vs Income')
plt.subplot(122)
sns.set_palette('colorblind')
sns.kdeplot(x='Age',data=df,hue='Gender',fill=True,palette=["#ff6361","#ffa600"], alpha=.8, linewidth=0)
plt.title('Gender Vs Age')
plt.suptitle('Income  And  Age Distribution Vs Gender')
plt.tight_layout()


# Occupation

In [None]:
plt.figure(figsize=(17,9))
sns.set_style('white')
sns.set_context("poster", font_scale = .85)
plt.subplot(1,3,1)
sns.barplot(x='Occupation',y='Age',hue='Region',data=df,palette=['#ff6361','cyan'])
plt.xlabel('Occupation')
plt.ylabel('Count')
plt.title('Occupation Vs Count')
plt.xticks(rotation='vertical')
plt.grid()
hspace=0.9
plt.subplot(1,3,2)
sns.pointplot(x='Occupation',data=df,y='Age',color='limegreen',hue='Gender')
plt.xlabel('Occupation')
plt.ylabel('Age')
plt.title('Occupation Vs Age')
plt.xticks(rotation='vertical')
plt.grid()
plt.subplot(1,3,3)
sns.violinplot(x='Occupation',data=df,y='Income',color='coral')
plt.xlabel('Occupation')
plt.ylabel('Income')
plt.title('Occupation Vs Income')
plt.xticks(rotation='vertical')
plt.grid()
plt.suptitle('Occupation')
plt.tight_layout()
plt.show()

# Region

In [None]:
plt.figure(figsize=(18,9.5))
sns.set_style('white')
sns.set_context("poster", font_scale = .85)
plt.subplot(1,3,1)
sns.boxplot(x='Region',y='Age',data=df,palette=['#ff6361','cyan'])
plt.xlabel('Occupation')
plt.ylabel('Age')
plt.title('Occupation Vs Count')
plt.xticks(rotation='vertical')
plt.grid()
hspace=0.9
plt.subplot(1,3,2)
sns.kdeplot(x='Income',data=df,color='limegreen',hue='Region',fill=True,palette=["#ff6361","#ffa600"],alpha=0.8)
plt.xlabel('Occupation')
plt.ylabel('Age')
plt.title('Occupation Vs Age')
plt.xticks(rotation='vertical')
plt.grid()
plt.subplot(1,3,3)
sns.countplot(x='Region',data=df,hue='Gender',palette=['#58508d','limegreen'])
plt.xlabel('Occupation')
plt.ylabel('Income')
plt.title('Occupation Vs Income')
plt.xticks(rotation='vertical')
plt.grid()
plt.suptitle('Occupation')
plt.tight_layout()
plt.show()

# Income

In [None]:
plt.figure(figsize=(18,9))
sns.set_style('white')
sns.set_context("poster", font_scale = .85)
plt.subplot(1,3,1)
sns.kdeplot(x='Income',data=df,hue='Has Laptop',alpha =0.8,fill=True,palette=['#1e434c','#ff6361'])
plt.title('Income Vs Has Laptop')
plt.grid()
hspace=0.9
plt.subplot(1,3,2)
c=[i for i in range(0,20)]
sns.barplot(x='Occupation',y=c,data=df,hue='Has Laptop',palette=['#ff6361','limegreen','#ffa600'])
plt.xlabel('Occupation')
plt.ylabel('Count')
plt.title('Occupation Vs Has Laptop')
plt.grid()
plt.subplot(1,3,3)
sns.set_palette("RdBu",)
sns.countplot(x='Region',data=df,hue='Has Laptop',palette=['cyan','#58508d'])
plt.xlabel('Region')
plt.title('Region Vs Has Laptop')
plt.grid()
plt.suptitle('Availibility Of Laptop')
plt.tight_layout()
plt.show()

# 

# Feature Engineering

In [None]:
df['Has Laptop'] = df['Has Laptop'].apply(lambda x: 1 if x=='yes' else 0) #1 - yes ,0 - no
df['Gender']=df['Gender'].apply(lambda x: 1 if x=='male' else 0)#1 - male ,0 - female
df['Region'] = df['Region'].apply(lambda x: 1 if x=='city' else 0)#1 - city ,0 - countryside
Occupy = {'student': 0, 'teacher': 1 , 'banker': 2, 'officer':3}#0 - student,1 - teacher, 2 - banker, 3 - officer 
df['Occupation']=df['Occupation'].map(Occupy)

In [None]:
df.head()

# import Libraries

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
logmodel = LogisticRegression()
from sklearn.metrics import accuracy_score,confusion_matrix
from sklearn.metrics import classification_report
from sklearn.preprocessing  import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

# Model Building

1. # Random Forest 

In [None]:
X = df.drop('Has Laptop',axis=1)
y = df['Has Laptop']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,random_state=101)
rfc = RandomForestClassifier(n_estimators=100)
rfc.fit(X_train, y_train)
rfc_pred = rfc.predict(X_test)
print('Random Forest Accuracy score is: ',accuracy_score(y_test,rfc_pred)*100)
k=accuracy_score(y_test,rfc_pred)*100
print('\n')
print(confusion_matrix(y_test,rfc_pred))
print('\n')
print(classification_report(y_test,rfc_pred))

# 2. KNN  

In [None]:
scale= StandardScaler()
scale.fit(df.drop('Has Laptop',axis=1))
scaled_features = scale.transform(df.drop('Has Laptop',axis=1))
df_feat = pd.DataFrame(scaled_features,columns=df.columns[:-1])
X = df_feat
y = df['Has Laptop']
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.50,random_state=101)
knn = KNeighborsClassifier(n_neighbors=1)
pred = knn.fit(X_train,y_train)
pred = knn.predict(X_test)
print('KNN Accuracy score is: ',accuracy_score(y_test,pred)*100)
print('\n')
print(confusion_matrix(y_test,pred))
print('\n')
print(classification_report(y_test,pred))

In [None]:
plt.figure(figsize=(18,6))
error = []
for i in range(1,10):
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(X_train,y_train)
    pred_i = knn.predict(X_test)
    error.append(np.mean(pred_i != y_test))

sns.set_style('whitegrid')
plt.plot(range(1,10),error,color='green', linestyle='dashed', marker='o',
         markerfacecolor='red', markersize=10)
plt.title('Error Rate vs. K Value')
plt.xlabel('K')
plt.ylabel('Error Rate')

In [None]:
knn = KNeighborsClassifier(n_neighbors=6)
pred = knn.fit(X_train,y_train)
pred = knn.predict(X_test)
#print('LogisticRegression score is: ',np.round(model.score(y_test,pred)*100,decimals=2))
print('\n')
print('Best KNN Accuracy score is: ',accuracy_score(y_test,pred)*100)
print('\n')
m=accuracy_score(y_test,pred)*100
print(confusion_matrix(y_test,pred))
print('\n')
print(classification_report(y_test,pred))

# 3. Logistics Score

In [None]:

logmodel = LogisticRegression()
logmodel.fit(X_train,y_train)
predictions = logmodel.predict(X_test)
print('\n')
print('Logistics Accuracy score is: ',accuracy_score(y_test,predictions)*100)
p=accuracy_score(y_test,predictions)*100

print('\n')
print(confusion_matrix(y_test,predictions))
print('\n')
print(classification_report(y_test,predictions))

In [None]:
cm = confusion_matrix(y_test, predictions)
fig, ax = plt.subplots(figsize=(8, 5))
ax.imshow(cm)
ax.grid(False)
ax.xaxis.set(ticks=(0, 1), ticklabels=('Predicted 0s', 'Predicted 1s'))
ax.yaxis.set(ticks=(0, 1), ticklabels=('Actual 0s', 'Actual 1s'))
ax.set_ylim(1.5, -0.5)
for i in range(2):
    for j in range(2):
        ax.text(j, i, cm[i, j], ha='center', va='center', color='white')
plt.show()

# 4. Vector Machine

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,random_state=101)
model = SVC()
model.fit(X_train,y_train)
pred = model.predict(X_test)
print('Vector Machine Accuracy score is: ',accuracy_score(y_test,pred)*100)
print('\n')
print(confusion_matrix(y_test,pred))
print('\n')
print(classification_report(y_test,pred))

> Grid Search
Choosing C and Gamma values

In [None]:

param_grid = {'C': [0.1,1, 10, 100, 1000], 'gamma': [1,0.1,0.01,0.001,0.0001], 'kernel': ['rbf']} 
grid = GridSearchCV(SVC(),param_grid,refit=True,verbose=3)
grid.fit(X_train,y_train)
print(grid.best_params_)
print(grid.best_estimator_)
print(grid.best_score_)

In [None]:
grid_pred = grid.predict(X_test)
print('Best Vector Machine Accuracy score is: ',accuracy_score(y_test,grid_pred)*100)
g=accuracy_score(y_test,grid_pred)*100
print('\n')
print(confusion_matrix(y_test,grid_pred))
print('\n')
print(classification_report(y_test,grid_pred))

In [None]:
label = ['Random Forest','K Nearest Neighbours','Logistics Regression','Vector Machine']
fig = px.pie(labels=label,values=[k,m,p,g], width = 900,names=label, height = 700)
fig.update_traces(textposition = 'inside', 
                  textinfo = 'percent + label', 
                  hole = 0.8, 
                  marker = dict(colors = ['#8d230f','#ff6361','#ffa600','#bc5090'], line = dict(color = 'white', width = 2)))

fig.update_layout(annotations = [dict(text = 'Performance Comparison', 
                                      x = 0.5, y = 0.5, font_size = 28, showarrow = False, 
                                      font_family = 'monospace',
                                      font_color = 'black')],
                  showlegend = False)
                  

# Thanks