<a href="https://colab.research.google.com/github/ridvanyigit/My-Projects/blob/main/Social_Network_Ads.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [228]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

In [229]:
# Loading the dataset
df = pd.read_csv('Social_Network_Ads.csv')

# Display the first 5 rows of the dataset
df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [230]:
# Checking for any missing values in the dataset
df.isnull().sum()

Unnamed: 0,0
User ID,0
Gender,0
Age,0
EstimatedSalary,0
Purchased,0


In [231]:
# Displaying information about the dataset, including data types and memory usage
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   User ID          400 non-null    int64 
 1   Gender           400 non-null    object
 2   Age              400 non-null    int64 
 3   EstimatedSalary  400 non-null    int64 
 4   Purchased        400 non-null    int64 
dtypes: int64(4), object(1)
memory usage: 15.8+ KB


In [232]:
# Displaying the statistical summary of the dataset
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
User ID,400.0,15691540.0,71658.321581,15566689.0,15626763.75,15694341.5,15750363.0,15815236.0
Age,400.0,37.655,10.482877,18.0,29.75,37.0,46.0,60.0
EstimatedSalary,400.0,69742.5,34096.960282,15000.0,43000.0,70000.0,88000.0,150000.0
Purchased,400.0,0.3575,0.479864,0.0,0.0,0.0,1.0,1.0


In [233]:
# Dropping the 'User ID' column as it's not needed for modeling
df.drop('User ID', axis=1, inplace=True)

In [234]:
# Displaying the first row to confirm the column has been dropped
df.head(1)

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,Male,19,19000,0


In [235]:
# Encoding the 'Gender' column (Male/Female) to numerical values (1/0)
le = LabelEncoder()
df['Gender'] = le.fit_transform(df['Gender'])

In [236]:
# Displaying the first 5 rows to verify the encoding
df.head()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,1,19,19000,0
1,1,35,20000,0
2,0,26,43000,0
3,0,27,57000,0
4,1,19,76000,0


In [237]:
# Splitting the dataset into features (X) and target (y)
X = df.iloc[:, :-1].values  # Features: Gender, Age, EstimatedSalary
y = df.iloc[:, -1].values   # Target: Purchased (0 or 1)

In [238]:
# Splitting the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [239]:
print(X_train)

[[     0     58  95000]
 [     1     25  22000]
 [     1     41  59000]
 [     1     28  89000]
 [     0     42  80000]
 [     0     42 108000]
 [     0     46  96000]
 [     0     47 113000]
 [     0     33  28000]
 [     1     19  25000]
 [     1     49  89000]
 [     0     31  15000]
 [     0     30  79000]
 [     1     48 141000]
 [     0     32 117000]
 [     0     37  71000]
 [     0     18  86000]
 [     0     42  79000]
 [     0     27  84000]
 [     1     40  65000]
 [     1     57  74000]
 [     0     26  15000]
 [     0     26  80000]
 [     1     29  43000]
 [     0     33 149000]
 [     1     39  42000]
 [     0     54 104000]
 [     1     36  33000]
 [     0     46  32000]
 [     0     40 142000]
 [     0     37  62000]
 [     1     29 148000]
 [     0     37  57000]
 [     1     35  50000]
 [     0     42  53000]
 [     1     35  38000]
 [     0     41  30000]
 [     0     40  72000]
 [     1     26  15000]
 [     0     31  68000]
 [     1     35  53000]
 [     0     35 

In [240]:
print(y_train)

[1 0 0 0 1 1 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0 1 1 0 1 0 0 0 0 0
 0 0 0 0 0 0 0 1 0 0 1 0 0 0 1 0 0 1 1 0 0 0 1 1 0 1 1 0 1 1 1 0 0 1 1 1 0
 0 0 0 0 1 0 0 0 1 0 0 1 1 1 1 0 1 0 0 0 0 1 0 1 0 0 0 1 0 0 0 1 1 1 1 0 0
 0 0 0 1 0 1 1 0 0 0 1 0 1 0 1 1 0 1 0 0 0 0 0 1 1 1 0 0 1 0 0 0 0 0 1 0 1
 0 0 1 0 1 0 0 0 0 1 0 0 0 0 0 1 1 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 1 1 0 1
 1 0 0 0 0 1 0 1 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1 0 1 0
 1 1 0 0 1 0 1 0 0 0 0 1 0 0 0 1 1 1 1 1 1 1 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0
 0 1 1 0 0 0 0 0 0]


In [241]:
print(X_test)

[[     0     46  22000]
 [     0     59  88000]
 [     0     28  44000]
 [     0     48  96000]
 [     0     29  28000]
 [     0     30  62000]
 [     0     47 107000]
 [     0     29  83000]
 [     1     40  75000]
 [     1     42  65000]
 [     0     35  65000]
 [     0     53  34000]
 [     0     23  48000]
 [     0     20  23000]
 [     1     30  87000]
 [     1     35 108000]
 [     0     52  38000]
 [     0     46  74000]
 [     1     39  42000]
 [     1     56  60000]
 [     0     22  27000]
 [     1     29  80000]
 [     1     47  23000]
 [     0     59  76000]
 [     1     19  19000]
 [     1     51  23000]
 [     1     42  80000]
 [     1     37  53000]
 [     0     55 125000]
 [     0     19  21000]
 [     0     46  41000]
 [     1     19  70000]
 [     1     36 144000]
 [     1     28  79000]
 [     1     40 107000]
 [     1     35  75000]
 [     1     37  55000]
 [     0     38  65000]
 [     1     26  30000]
 [     0     18  68000]
 [     1     48  33000]
 [     0     24 

In [242]:
print(y_test)

[0 1 0 1 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 1 1 0 1 0 0 1 0 1 0 1 0 1 0 0
 0 0 0 1 0 0 1 0 1 0 0 1 0 0 1 0 0 0 0 1 0 0 0 0 0 1 0 0 1 1 1 0 0 1 0 0 0
 1 0 1 1 0 1 0 1 1 0 1 1 0 1 0 0 1 0 0 0 0 0 1 0 1 1 1 0 1 0 0 1 1 1 1 0 0
 0 1 1 1 0 0 1 0 0 0 0 1 1 1 0 0 0 0 1 1 0]


In [243]:
# Scaling the features using StandardScaler to normalize the data
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [244]:
# Training the Support Vector Machine (SVM) model with the 'rbf' kernel
classifier = SVC(kernel='rbf', random_state=0)
classifier.fit(X_train, y_train)

In [245]:
# Making a prediction for a single data point: [Male (1), Age 30, EstimatedSalary 87000]
# Output will be 0 (not purchased) or 1 (purchased)
print(classifier.predict(sc.transform([[1, 30, 87000]])))

[0]


In [246]:
# Making predictions on the test set
y_pred = classifier.predict(X_test)

# Concatenating and displaying the predicted and actual values side by side
print(np.concatenate((y_pred.reshape(len(y_pred), 1), y_test.reshape(len(y_test), 1)), 1))

[[1 0]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [1 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [1 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 1]
 [1 1]
 [0 0]
 [1 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [0 0]
 [0 1]
 [0 0]
 [1 1]
 [1 1]
 [1 0]
 [1 1]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [1 1]
 [0 0]]


In [247]:
# Evaluating the model using a confusion matrix and accuracy score
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

# Calculating and displaying the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Confusion Matrix:
 [[75  5]
 [ 3 49]]
Accuracy: 0.9393939393939394
