In [1]:
%matplotlib inline               
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('india data PM10 classified.csv')

In [3]:
df.head()

Unnamed: 0,PM 10,Max Temp,Avg Dew Point,Max Wind Speed,Avg Humidity,Max Humidity,Max Pressure,Min Pressure,Precipitation
0,0,96,70,9,58.5,83,29.76,29.65,0.08
1,0,108,61,15,31.0,53,29.62,29.47,0.0
2,0,109,69,7,34.0,49,29.6,29.45,0.0
3,1,58,50,6,81.5,97,30.11,29.96,0.0
4,1,71,61,14,84.0,100,29.97,29.83,0.28


In [4]:
df.describe()

Unnamed: 0,PM 10,Max Temp,Avg Dew Point,Max Wind Speed,Avg Humidity,Max Humidity,Max Pressure,Min Pressure,Precipitation
count,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0
mean,1.459677,90.137097,65.879032,9.403226,59.28629,81.306452,29.81871,29.693548,0.034355
std,0.653848,11.625976,9.57442,5.069428,13.767763,14.891335,0.216516,0.217841,0.165131
min,0.0,58.0,45.0,0.0,20.0,29.0,29.42,29.31,0.0
25%,1.0,82.0,59.75,6.0,51.5,74.0,29.62,29.4875,0.0
50%,1.0,93.5,66.0,9.0,60.75,86.0,29.825,29.685,0.0
75%,2.0,97.25,75.0,12.25,69.0,93.0,30.01,29.89,0.0
max,3.0,113.0,80.0,34.0,91.5,100.0,30.22,30.12,1.61


In [5]:
df.shape

(248, 9)

In [6]:
X = df.drop('PM 10',axis=1)
y = np.asarray(df['PM 10'], dtype="|S6")

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [8]:
X_train.shape

(198, 8)

In [9]:
X_test.shape

(50, 8)

In [10]:
y_train.shape

(198,)

In [11]:
y_test.shape

(50,)

In [12]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [13]:
scaler.fit(X_train)

StandardScaler(copy=True, with_mean=True, with_std=True)

In [14]:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [15]:
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(max_iter=2000)

parameter_space = {
    'hidden_layer_sizes': [(2, 2, 2), (4, 4, 4), (6, 6, 6), (8, 8, 8), (10, 10, 10), (12, 12, 12)],
    'activation': ['identity', 'logistic', 'tanh', 'relu'],
    'solver': ['lbfgs', 'sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive'],
}

In [16]:
from sklearn.model_selection import GridSearchCV

clf = GridSearchCV(mlp, parameter_space, n_jobs=-1, cv=3)
clf.fit(X_train, y_train)

GridSearchCV(cv=3, error_score='raise',
       estimator=MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=2000, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid={'hidden_layer_sizes': [(2, 2, 2), (4, 4, 4), (6, 6, 6), (8, 8, 8), (10, 10, 10), (12, 12, 12)], 'activation': ['identity', 'logistic', 'tanh', 'relu'], 'solver': ['lbfgs', 'sgd', 'adam'], 'alpha': [0.0001, 0.05], 'learning_rate': ['constant', 'adaptive']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [17]:
# Best parameter set
print('Best parameters found:\n', clf.best_params_)

# All results
means = clf.cv_results_['mean_test_score']
stds = clf.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))

Best parameters found:
 {'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': (12, 12, 12), 'learning_rate': 'constant', 'solver': 'lbfgs'}
0.566 (+/-0.024) for {'activation': 'identity', 'alpha': 0.0001, 'hidden_layer_sizes': (2, 2, 2), 'learning_rate': 'constant', 'solver': 'lbfgs'}
0.551 (+/-0.087) for {'activation': 'identity', 'alpha': 0.0001, 'hidden_layer_sizes': (2, 2, 2), 'learning_rate': 'constant', 'solver': 'sgd'}
0.561 (+/-0.058) for {'activation': 'identity', 'alpha': 0.0001, 'hidden_layer_sizes': (2, 2, 2), 'learning_rate': 'constant', 'solver': 'adam'}
0.566 (+/-0.024) for {'activation': 'identity', 'alpha': 0.0001, 'hidden_layer_sizes': (2, 2, 2), 'learning_rate': 'adaptive', 'solver': 'lbfgs'}
0.510 (+/-0.019) for {'activation': 'identity', 'alpha': 0.0001, 'hidden_layer_sizes': (2, 2, 2), 'learning_rate': 'adaptive', 'solver': 'sgd'}
0.561 (+/-0.039) for {'activation': 'identity', 'alpha': 0.0001, 'hidden_layer_sizes': (2, 2, 2), 'learning_rate': 'adaptive', 

In [18]:
y_true, y_pred = y_test , clf.predict(X_test)

from sklearn.metrics import classification_report
print('Results on the test set:')
print(classification_report(y_true, y_pred))

Results on the test set:
             precision    recall  f1-score   support

       b'1'       1.00      1.00      1.00        27
       b'2'       1.00      0.80      0.89        20
       b'3'       0.43      1.00      0.60         3

avg / total       0.97      0.92      0.93        50

