In [1]:
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori,association_rules


In [2]:
df = pd.read_csv('weather.csv')
df

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather
0,2012-01-01,0.0,12.8,5.0,4.7,drizzle
1,2012-01-02,10.9,10.6,2.8,4.5,rain
2,2012-01-03,0.8,11.7,7.2,2.3,rain
3,2012-01-04,20.3,12.2,5.6,4.7,rain
4,2012-01-05,1.3,8.9,2.8,6.1,rain
...,...,...,...,...,...,...
1456,2015-12-27,8.6,4.4,1.7,2.9,rain
1457,2015-12-28,1.5,5.0,1.7,1.3,rain
1458,2015-12-29,0.0,7.2,0.6,2.6,fog
1459,2015-12-30,0.0,5.6,-1.0,3.4,sun


In [4]:
type = df.dtypes
type

date              object
precipitation    float64
temp_max         float64
temp_min         float64
wind             float64
weather           object
dtype: object

In [6]:
df.isna().sum()

date             0
precipitation    0
temp_max         0
temp_min         0
wind             0
weather          0
dtype: int64

In [7]:
col = df.iloc[:,3]
col

0       5.0
1       2.8
2       7.2
3       5.6
4       2.8
       ... 
1456    1.7
1457    1.7
1458    0.6
1459   -1.0
1460   -2.1
Name: temp_min, Length: 1461, dtype: float64

In [9]:
#sorting the temp_min column
sort_col= np.sort(col)
sort_col

array([-7.1, -6.6, -6. , ..., 18.3, 18.3, 18.3])

In [8]:
col2 =df.iloc[:,4]
col2

0       4.7
1       4.5
2       2.3
3       4.7
4       6.1
       ... 
1456    2.9
1457    1.3
1458    2.6
1459    3.4
1460    3.5
Name: wind, Length: 1461, dtype: float64

In [10]:
#sorting the wind column
sort_col2= np.sort(col2)
sort_col2

array([0.4, 0.5, 0.5, ..., 8.8, 8.8, 9.5])

In [18]:
data = {'temp_min': sort_col, 'wind': sort_col2}
df1 = pd.DataFrame(data)
df1

Unnamed: 0,temp_min,wind
0,-7.1,0.4
1,-6.6,0.5
2,-6.0,0.5
3,-5.5,0.5
4,-4.9,0.6
...,...,...
1456,18.3,8.1
1457,18.3,8.2
1458,18.3,8.8
1459,18.3,8.8


In [22]:
#bining the temp_min column
df1['temp_min_bin'] = pd.cut(df1['temp_min'], 3,labels=[0,1,2])
df1

Unnamed: 0,temp_min,wind,temp_min_bin
0,-7.1,0.4,0
1,-6.6,0.5,0
2,-6.0,0.5,0
3,-5.5,0.5,0
4,-4.9,0.6,0
...,...,...,...
1456,18.3,8.1,2
1457,18.3,8.2,2
1458,18.3,8.8,2
1459,18.3,8.8,2


In [23]:
#bining the wind column
df1['wind_bin'] = pd.cut(df1['wind'], 3,labels=[0,1,2])
df1

Unnamed: 0,temp_min,wind,temp_min_bin,wind_bin
0,-7.1,0.4,0,0
1,-6.6,0.5,0,0
2,-6.0,0.5,0,0
3,-5.5,0.5,0,0
4,-4.9,0.6,0,0
...,...,...,...,...
1456,18.3,8.1,2,2
1457,18.3,8.2,2,2
1458,18.3,8.8,2,2
1459,18.3,8.8,2,2


In [27]:
# Convert binned data to one-hot encoded format
df1_onehot = pd.get_dummies(df1[['temp_min_bin', 'wind_bin']], prefix=['temp_min', 'wind'])
df1

Unnamed: 0,temp_min,wind,temp_min_bin,wind_bin
0,-7.1,0.4,0,0
1,-6.6,0.5,0,0
2,-6.0,0.5,0,0
3,-5.5,0.5,0,0
4,-4.9,0.6,0,0
...,...,...,...,...
1456,18.3,8.1,2,2
1457,18.3,8.2,2,2
1458,18.3,8.8,2,2
1459,18.3,8.8,2,2


In [28]:
# Apply Apriori algorithm
frequent_itemsets = apriori(df1_onehot, min_support=0.2, use_colnames=True)

In [30]:

# Generate association rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)

In [31]:
print(frequent_itemsets)
print(rules)

    support              itemsets
0  0.484600          (temp_min_1)
1  0.417522          (temp_min_2)
2  0.637235              (wind_0)
3  0.333333              (wind_1)
4  0.484600  (temp_min_1, wind_0)
5  0.333333  (temp_min_2, wind_1)
    antecedents   consequents  antecedent support  consequent support  \
0  (temp_min_1)      (wind_0)            0.484600            0.637235   
1      (wind_0)  (temp_min_1)            0.637235            0.484600   
2  (temp_min_2)      (wind_1)            0.417522            0.333333   
3      (wind_1)  (temp_min_2)            0.333333            0.417522   

    support  confidence      lift  leverage  conviction  zhangs_metric  
0  0.484600    1.000000  1.569280  0.175796         inf       0.703851  
1  0.484600    0.760473  1.569280  0.175796    2.151739       1.000000  
2  0.333333    0.798361  2.395082  0.194159    3.306233       1.000000  
3  0.333333    1.000000  2.395082  0.194159         inf       0.873717  
