## Association Rules

## Movies DataSet
### Prepare rules and
1. Try different values of support and confidence. Observe the change in number of rules for different support,confidence values
2. Change the minimum length in apriori algorithm
3. Visulize the obtained rules using different plots

## 1. Import Necessary Libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

from mlxtend.frequent_patterns import apriori, association_rules

## 2. Import Data

In [None]:
movies_data=pd.read_csv('my_movies.csv')
movies_data

## 3. Data Pre-Processing

In [None]:
movies_data.drop(['V1','V2','V3','V4','V5'], axis=1, inplace=True)

In [None]:
movies_data

## 4. Initial Analysis

In [None]:
movies_data.shape

In [None]:
movies_data.isna().sum()

In [None]:
movies_data.dtypes

In [None]:
movies_data.sum()

In [None]:
plt.figure(figsize=(10,6))
plt.bar(movies_data.columns, movies_data.sum())
plt.title('Movie Counts')
plt.xlabel('Movies')
plt.show()

## 5. Model Building

#### i. 5% Support, 80% Confidence and Lift=1

In [None]:
freq_movies_1=apriori(df=movies_data,min_support=0.05,use_colnames=True,max_len=None)
freq_movies_1

In [None]:
best_associate=association_rules(df=freq_movies_1,metric='confidence',min_threshold=0.8,)
best_associate

In [None]:
best_associate=best_associate[best_associate['lift']>=1]
best_associate

In [None]:
plt.figure(figsize=(12,6))
plt.scatter(best_associate['support'],y=best_associate['confidence'], c=best_associate['lift'])
plt.colorbar()
plt.show()

In [None]:
best_associate_pivot=best_associate.pivot(index='consequents', columns='antecedents', values='lift').fillna(0)

In [None]:
plt.figure(figsize=(18,8))
sns.heatmap(round(best_associate_pivot,2), annot=True)
plt.show()

#### ii. 10 % Support, 80% Confidence anf Lift=1 with Pruning

In [None]:
freq_movies_1=apriori(df=movies_data,min_support=0.10,use_colnames=True,max_len=3)
freq_movies_1

In [None]:
best_associate=association_rules(df=freq_movies_1,metric='confidence',min_threshold=0.8,)
best_associate

In [None]:
best_associate=best_associate[best_associate['lift']>=1]
best_associate

In [None]:
plt.figure(figsize=(12,6))
plt.scatter(best_associate['support'],y=best_associate['confidence'], c=best_associate['lift'])
plt.colorbar()
plt.show()

In [None]:
best_associate_pivot=best_associate.pivot(index='consequents', columns='antecedents', values='lift').fillna(0)

In [None]:
plt.figure(figsize=(18,8))
sns.heatmap(round(best_associate_pivot,2), annot=True)
plt.show()

In [None]:
best_associate[(best_associate['confidence']==1) & (best_associate['lift']==10)]

#### By looking the association rules, it seems that the people who watches Gladiator and Green Mile also watches LOTR

In [None]:
movies_data.head()

In [None]:
movies_data_1=movies_data.iloc[:,:4]
movies_data_1=movies_data_1.fillna('null')
movies_data_1

In [None]:
movies_data_1=movies_data_1.to_numpy().tolist()
movies_data_1

In [None]:
for i in range(0, len(movies_data_1)):
    while 'null' in movies_data_1[i]:
        movies_data_1[i].remove('null')

In [None]:
movies_data_1

In [None]:
from mlxtend.preprocessing import TransactionEncoder

In [None]:
trans_encoder=TransactionEncoder()

In [None]:
movies_data_encoded=trans_encoder.fit_transform(movies_data_1)
movies_data_encoded=pd.DataFrame(movies_data_encoded, columns=trans_encoder.columns_)
movies_data_encoded

In [None]:
movies_data_encoded=movies_data_encoded.replace([True, False],[1,0])
movies_data_encoded

In [None]:
movies_data