# **Web Mining Course Presentation - Fall 2021**
Mehrdad Mohammadian

GitHub repo: https://github.com/mehrdad-dev/webmining-course-fall2021


tips❗

download this jupyter notebook and upload in the google colab
or copy link of this notebook on github, then paste on the `open notebook > github section`.


# **Maximal Clique - Clustring**

NetworkX:  https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.clique.find_cliques.html#networkx.algorithms.clique.find_cliques

In [31]:
from networkx.algorithms import clique
import networkx as nx

In [32]:
G = nx.Graph()
edges_fig_4 = [('p1', 'p2'), ('p1', 'p3'), ('p3', 'p2')]
G.add_edges_from(edges_fig_4)

In [33]:
cliques = clique.find_cliques(G)
for index, clq in enumerate(cliques):
    print( f'Maximal Clique {index+1} ', clq)

Maximal Clique 1  ['p1', 'p2', 'p3']


# **KNN - Classification**

In [34]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics

## **Dataset**

In [35]:
wine = datasets.load_wine()
targets = wine.target
data = wine.data

In [36]:
wine.feature_names

['alcohol',
 'malic_acid',
 'ash',
 'alcalinity_of_ash',
 'magnesium',
 'total_phenols',
 'flavanoids',
 'nonflavanoid_phenols',
 'proanthocyanins',
 'color_intensity',
 'hue',
 'od280/od315_of_diluted_wines',
 'proline']

In [37]:
wine.target_names

array(['class_0', 'class_1', 'class_2'], dtype='<U7')

In [38]:
targets

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2])

In [39]:
import pandas as pd
df = pd.DataFrame(data)
df.head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0
5,14.2,1.76,2.45,15.2,112.0,3.27,3.39,0.34,1.97,6.75,1.05,2.85,1450.0
6,14.39,1.87,2.45,14.6,96.0,2.5,2.52,0.3,1.98,5.25,1.02,3.58,1290.0
7,14.06,2.15,2.61,17.6,121.0,2.6,2.51,0.31,1.25,5.05,1.06,3.58,1295.0
8,14.83,1.64,2.17,14.0,97.0,2.8,2.98,0.29,1.98,5.2,1.08,2.85,1045.0
9,13.86,1.35,2.27,16.0,98.0,2.98,3.15,0.22,1.85,7.22,1.01,3.55,1045.0


## **Model Training**

In [40]:
X_train, X_test, y_train, y_test = train_test_split(data, targets , test_size=0.3, shuffle=True, random_state=42)

In [41]:
knn_model = KNeighborsClassifier(n_neighbors=7)
knn_model.fit(X_train, y_train)

KNeighborsClassifier(n_neighbors=7)

## **Prediction**

In [42]:
prediction = knn_model.predict(X_test)

In [43]:
prediction

array([0, 0, 2, 0, 1, 0, 1, 2, 2, 0, 2, 2, 0, 1, 0, 1, 1, 2, 0, 1, 0, 1,
       2, 1, 1, 1, 1, 2, 1, 0, 0, 1, 2, 0, 0, 0, 2, 2, 2, 1, 0, 1, 1, 0,
       2, 0, 2, 1, 2, 0, 1, 0, 0, 2])

In [44]:
y_test

array([0, 0, 2, 0, 1, 0, 1, 2, 1, 2, 0, 2, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1,
       1, 2, 2, 2, 1, 1, 1, 0, 0, 1, 2, 0, 0, 0, 2, 2, 1, 2, 0, 1, 1, 1,
       2, 0, 1, 1, 2, 0, 1, 0, 0, 2])

In [45]:
print("acc:",metrics.accuracy_score(y_test, prediction))

acc: 0.7592592592592593


## **GridSearch Example**

In [46]:
from sklearn.model_selection import GridSearchCV

parameters = {'n_neighbors':[1,2,3,4,5,6,7,8,9,10]}
grid = GridSearchCV(knn_model, parameters, cv=10, scoring = 'accuracy', verbose=1)
grid.fit(X_train, y_train)

Fitting 10 folds for each of 10 candidates, totalling 100 fits


GridSearchCV(cv=10, estimator=KNeighborsClassifier(n_neighbors=7),
             param_grid={'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]},
             scoring='accuracy', verbose=1)

In [47]:
print(grid.best_score_)
print(grid.best_params_)
print(grid.best_estimator_)

0.7435897435897435
{'n_neighbors': 1}
KNeighborsClassifier(n_neighbors=1)
