# <center> Pulsar Classification

# Importing data & Libraries

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import warnings

%matplotlib inline
warnings.filterwarnings('ignore')

In [None]:
data = pd.read_csv('../input/pulsar-classification-for-class-prediction/Pulsar.csv')

# EDA

In [None]:
data.head()

In [None]:
data.info()

In [None]:
data.groupby(['Class']).mean()

In [None]:
sns.countplot(data['Class'])

In [None]:
pd.DataFrame(data.corr()['Class']).sort_values(by='Class').transpose().drop('Class',axis=1).transpose()

In [None]:
plt.figure(figsize=(5,5))
sns.heatmap(pd.DataFrame(data.corr()['Class']).sort_values(by='Class').transpose().drop('Class',axis=1).transpose(),annot=True,cmap='CMRmap')

In [None]:
sns.pairplot(data,hue='Class')

In [None]:
plt.figure(figsize=(18,12))
m=1
for i in data.drop('Class',axis=1).columns:
    plt.subplot(3,3,m)
    m+=1
    sns.histplot(data=data,x=i,hue='Class',kde=True)

# Feature Selection

In [None]:
X = data.drop('Class',axis=1)
Y = data['Class']
X['nf']=X['EK']*X['Skewness']

# Feature Scaling

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X = sc.fit_transform(X)

# Test Train Split

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=100)

# Model selection and Evaluation

In [None]:
from sklearn.metrics import accuracy_score,classification_report

#XGB
import xgboost as xgb
from xgboost import XGBClassifier
xgb = XGBClassifier() 


# Logistic Regression
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()

#RFC
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier()

#KNN
from sklearn.neighbors import KNeighborsClassifier
accuracy = []
for i in range(1,40):    
    kn = KNeighborsClassifier(n_neighbors=i)
    kn.fit(X_train,Y_train)
    predK = kn.predict(X_test)
    accuracy.append([accuracy_score(Y_test,predK),i])
    #print('Tested for k =',i)
temp = accuracy[0]
for m in accuracy:
    if temp[0] < m[0]:
        temp=m
knn = KNeighborsClassifier(n_neighbors=temp[1])

#SVM
from sklearn.svm import SVC
svc = SVC()

from sklearn.model_selection import GridSearchCV
param_grid = {'C': [0.1,1, 10, 100, 1000,2000], 'gamma': [1,0.1,0.01,0.001,0.0001], 'kernel': ['rbf']} 
grid = GridSearchCV(SVC(),param_grid,refit=True,verbose=3)

print('Models Imported')

In [None]:
model_acc = []
models = [xgb,lr,rfc,knn,svc,grid]
#model_name = ['xgb','lr','rfc','kno','svc','grid']
for i in models:
    i.fit(X_train,Y_train)
    model_acc.append(accuracy_score(Y_test,i.predict(X_test)))
                      
models = pd.DataFrame({'Models':models,'Accuracy':model_acc})

In [None]:
models = models.sort_values(by=['Accuracy'],ascending=False).reset_index().drop('index',axis=1)
best = models['Models'][0]
models['Models']=models['Models'].astype(str).str.split("(", n = 2, expand = True)[0]
models

In [None]:
print('Hence the best model is',models['Models'][0],'with an accuracy of',round((models['Accuracy'][0]*100),2),'%')
print('\nThe classification report is:')
print(classification_report(Y_test,best.predict(X_test)))