#### Apriori Algorithm where our number of transcation is the number of days present in the patients data 
##### Each Transaction will look like [day1,day5,day10,day16] where each value is the day where the compliant time is less than 45 mins for the particular hour like for hour zero the the compliant time was less than 45 mins for day1,day5,day10,day16

## OR 

#### Each Transaction will look like [monday,tuesday,sunday] where each value is the day where the compliant time is less than 40 mins for the particular hour like for hour zero the the compliant time was less than 40 mins for monday,tuesday,sunday


##### Indexing based on the date 

In [1]:
#Libraries required 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
from mpl_toolkits import mplot3d
import seaborn as sns
import statsmodels.api as sm
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [2]:
# Getting the data :
df1=pd.read_csv('cpmp_13038.csv')
df2=pd.read_csv('13058_hourly_compliance.csv')
df3=pd.read_csv('cpmp_13536.csv')

df1=df1.sort_values(by=['local_date','hour'],inplace=False)
df2=df2.sort_values(by=['local_date','hour'],inplace=False)
df3=df3.sort_values(by=['local_date','hour'],inplace=False)

df1=df1[['local_date','hour','compliant_min']]
df2=df2[['local_date','hour','compliant_min']]
df3=df3[['local_date','hour','compliant_min']]

df1 = df1.reset_index(drop=True)
df2 = df2.reset_index(drop=True)
df3 = df3.reset_index(drop=True)

# Convert 'local_date' column to datetime type
df1['local_date'] = pd.to_datetime(df1['local_date'])
df2['local_date'] = pd.to_datetime(df2['local_date'])

df3['local_date'] = pd.to_datetime(df3['local_date'])



In [3]:
df1

Unnamed: 0,local_date,hour,compliant_min
0,2022-02-25,0,0
1,2022-02-25,1,0
2,2022-02-25,2,0
3,2022-02-25,3,0
4,2022-02-25,4,0
...,...,...,...
1286,2022-04-29,14,0
1287,2022-04-29,15,0
1288,2022-04-29,16,0
1289,2022-04-29,17,0


In [4]:

df1['local_date'] = pd.to_datetime(df1['local_date'])


start_date = df1['local_date'].min()
end_date = df1['local_date'].max()


date_range = pd.date_range(start=start_date, end=end_date, freq='D')


all_dates_df = pd.DataFrame({'local_date': date_range})


all_hours = range(24)


all_hours_df = pd.DataFrame({'hour': all_hours})


all_dates_hours_df = all_dates_df.assign(key=1).merge(all_hours_df.assign(key=1), on='key').drop('key', axis=1)


merged_df = all_dates_hours_df.merge(df1, on=['local_date', 'hour'], how='left')


merged_df['compliant_min'] = merged_df['compliant_min'].fillna(0)


merged_df = merged_df.sort_values(['local_date', 'hour']).reset_index(drop=True)


print(merged_df)

     local_date  hour  compliant_min
0    2022-02-25     0            0.0
1    2022-02-25     1            0.0
2    2022-02-25     2            0.0
3    2022-02-25     3            0.0
4    2022-02-25     4            0.0
...         ...   ...            ...
1531 2022-04-29    19            0.0
1532 2022-04-29    20            0.0
1533 2022-04-29    21            0.0
1534 2022-04-29    22            0.0
1535 2022-04-29    23            0.0

[1536 rows x 3 columns]


In [5]:
df1=merged_df.copy()

In [6]:
# Create a dictionary to map unique days to day names
unique_days1 = df1['local_date'].dt.date.unique()
day_names1 = {day: f'day{i+1}' for i, day in enumerate(unique_days1)}

# Add a new column with day names
df1['day'] = df1['local_date'].dt.date.map(day_names1)

In [7]:
df1 = df1.reset_index(drop=True)

In [8]:
df1=df1.sort_values(by=['local_date','hour'],inplace=False)

In [9]:
df1

Unnamed: 0,local_date,hour,compliant_min,day
0,2022-02-25,0,0.0,day1
1,2022-02-25,1,0.0,day1
2,2022-02-25,2,0.0,day1
3,2022-02-25,3,0.0,day1
4,2022-02-25,4,0.0,day1
...,...,...,...,...
1531,2022-04-29,19,0.0,day64
1532,2022-04-29,20,0.0,day64
1533,2022-04-29,21,0.0,day64
1534,2022-04-29,22,0.0,day64


In [10]:
import io

def generate_heatmap1(df):
    # Create a copy of the dataframe
    patient_heat_map = df.copy()

    # Convert 'compliant_min' to integer
    patient_heat_map['compliant_min'] = patient_heat_map['compliant_min'].astype(int)

    # Get unique days in the DataFrame
    unique_days = patient_heat_map['day'].unique()
    unique_days = sorted(unique_days, key=lambda x: int(x[3:]))  # Sort the days numerically

    # Pivot the data
    pivot_data = patient_heat_map.pivot('hour', 'day', 'compliant_min')

    # Reorder the columns based on the sorted unique days
    pivot_data = pivot_data[unique_days]

    # Increase the figure size for better visibility
    plt.figure(figsize=(18, 6))

    # Creating the heatmap
    sns.heatmap(pivot_data, cmap='RdYlGn', vmin=0, vmax=60, cbar=True, linewidths=0.5, linecolor='gray')

    # Add lines to separate hours and days
    plt.hlines(y=range(0, len(pivot_data.index)), xmin=0, xmax=len(pivot_data.columns), color='gray', linewidths=0.5)
    plt.vlines(x=range(0, len(pivot_data.columns)), ymin=0, ymax=len(pivot_data.index), color='gray', linewidths=0.5)

    # Customize the plot
    plt.title('Compliant Minutes Heatmap', fontsize=16)
    plt.xlabel('Day', fontsize=14)
    plt.ylabel('Hour', fontsize=14)

    # Increase the font size of the x-axis and y-axis labels
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)

    # Save the plot to a BytesIO object
    plot_bytes = io.BytesIO()
    plt.savefig(plot_bytes, format='png', dpi=400)  # Increase the dpi for higher resolution
    plt.show()
    plt.close()

    # Reset the buffer position to the start
    plot_bytes.seek(0)

    # Return the BytesIO object
    return plot_bytes


In [None]:
df1= df1[df1['compliant_min'] < 30]

In [None]:
df1['day_of_week'] = df1['local_date'].dt.day_name()

In [None]:
df1

In [12]:
# Group the DataFrame by 'day'
grouped_by_day = df1.groupby('day')

# Filter and keep only the days with more than 5 rows where 'compliant_min' is less than 30
filtered_days = grouped_by_day.filter(lambda x: (x['compliant_min'] < 30).sum() > 6)


In [13]:
filtered_days

Unnamed: 0,local_date,hour,compliant_min,day
0,2022-02-25,0,0.0,day1
1,2022-02-25,1,0.0,day1
2,2022-02-25,2,0.0,day1
3,2022-02-25,3,0.0,day1
4,2022-02-25,4,0.0,day1
...,...,...,...,...
1531,2022-04-29,19,0.0,day64
1532,2022-04-29,20,0.0,day64
1533,2022-04-29,21,0.0,day64
1534,2022-04-29,22,0.0,day64


In [14]:
# Filter the DataFrame for each hour
filtered_data = []
for hour in range(24):
    hour_data = filtered_days[filtered_days['hour'] == hour]['day'].tolist()
    filtered_data.append(tuple(hour_data))

# Store the filtered data as a tuple
filtered_data_tuple = tuple(filtered_data)

# Print the result
for hour, data in enumerate(filtered_data_tuple):
    print(f"For hour {hour}: {data}")

For hour 0: ('day1', 'day6', 'day7', 'day8', 'day9', 'day10', 'day11', 'day12', 'day13', 'day31', 'day32', 'day40', 'day41', 'day42', 'day43', 'day44', 'day45', 'day46', 'day48', 'day50', 'day53', 'day56', 'day57', 'day60', 'day62', 'day64')
For hour 1: ('day1', 'day6', 'day7', 'day8', 'day9', 'day10', 'day11', 'day12', 'day13', 'day31', 'day32', 'day40', 'day41', 'day42', 'day43', 'day44', 'day45', 'day46', 'day48', 'day50', 'day53', 'day56', 'day57', 'day60', 'day62', 'day64')
For hour 2: ('day1', 'day6', 'day7', 'day8', 'day9', 'day10', 'day11', 'day12', 'day13', 'day31', 'day32', 'day40', 'day41', 'day42', 'day43', 'day44', 'day45', 'day46', 'day48', 'day50', 'day53', 'day56', 'day57', 'day60', 'day62', 'day64')
For hour 3: ('day1', 'day6', 'day7', 'day8', 'day9', 'day10', 'day11', 'day12', 'day13', 'day31', 'day32', 'day40', 'day41', 'day42', 'day43', 'day44', 'day45', 'day46', 'day48', 'day50', 'day53', 'day56', 'day57', 'day60', 'day62', 'day64')
For hour 4: ('day1', 'day6', 'da

In [15]:
from collections import Counter

# Count the occurrences of each day across all hours
day_counts = Counter([day for hour_data in filtered_data_tuple for day in hour_data])

# Get the days that occurred at least 8 times
days_to_keep = set(day for day, count in day_counts.items() if count >= 5)

# Filter the tuples by removing the days that occurred fewer than 8 times
filtered_data_tuple_filtered = tuple([tuple(day for day in hour_data if day in days_to_keep) for hour_data in filtered_data_tuple])

# Print the result
for hour, data in enumerate(filtered_data_tuple_filtered):
    print(f"For hour {hour}: {data}")

For hour 0: ('day1', 'day6', 'day7', 'day8', 'day9', 'day10', 'day11', 'day12', 'day13', 'day31', 'day32', 'day40', 'day41', 'day42', 'day43', 'day44', 'day45', 'day46', 'day48', 'day50', 'day53', 'day56', 'day57', 'day60', 'day62', 'day64')
For hour 1: ('day1', 'day6', 'day7', 'day8', 'day9', 'day10', 'day11', 'day12', 'day13', 'day31', 'day32', 'day40', 'day41', 'day42', 'day43', 'day44', 'day45', 'day46', 'day48', 'day50', 'day53', 'day56', 'day57', 'day60', 'day62', 'day64')
For hour 2: ('day1', 'day6', 'day7', 'day8', 'day9', 'day10', 'day11', 'day12', 'day13', 'day31', 'day32', 'day40', 'day41', 'day42', 'day43', 'day44', 'day45', 'day46', 'day48', 'day50', 'day53', 'day56', 'day57', 'day60', 'day62', 'day64')
For hour 3: ('day1', 'day6', 'day7', 'day8', 'day9', 'day10', 'day11', 'day12', 'day13', 'day31', 'day32', 'day40', 'day41', 'day42', 'day43', 'day44', 'day45', 'day46', 'day48', 'day50', 'day53', 'day56', 'day57', 'day60', 'day62', 'day64')
For hour 4: ('day1', 'day6', 'da

In [16]:
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [17]:
# Convert the filtered_data_tuple_filtered into a list of transactions
transactions = list(filtered_data_tuple_filtered)

In [18]:
# Initialize TransactionEncoder
te = TransactionEncoder()
# Apply TransactionEncoder to the list of transactions
te_ary = te.fit(transactions).transform(transactions)
# Convert the encoded array into a DataFrame
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)


In [19]:
# Apply Apriori algorithm to find frequent itemsets
frequent_itemsets = apriori(df_encoded, min_support=0.75, use_colnames=True)


KeyboardInterrupt: 

In [None]:
# Sort the frequent itemsets by support in descending order
frequent_itemsets = frequent_itemsets.sort_values(by='support', ascending=False)

In [None]:
# Convert the itemsets column to frozensets
frequent_itemsets['itemsets'] = frequent_itemsets['itemsets'].apply(frozenset)

# Drop duplicate rows based on the frozensets in the itemsets column
frequent_itemsets = frequent_itemsets.drop_duplicates(subset='itemsets')

frequent_itemsets['itemsets'] = frequent_itemsets['itemsets'].apply(set)

print(frequent_itemsets)

In [None]:
# Filter the DataFrame to get itemsets with support of 0.5
filtered_itemsets = frequent_itemsets[frequent_itemsets['support'] > 0.7]

filtered_itemsets = filtered_itemsets.reset_index(drop=True)

In [None]:
filtered_itemsets.itemsets[3]

In [None]:
# Filter out itemsets with a single value
frequent_itemsets_filtered = frequent_itemsets[frequent_itemsets['itemsets'].apply(lambda x: len(x) ==1)]

# Print the filtered frequent itemsets
print(frequent_itemsets_filtered.head(15))

## days for which the patient is least compliant

In [None]:
# Filter the DataFrame for each hour
filtered_data = []
for hour in range(24):
    hour_data = filtered_days[filtered_days['hour'] == hour]['day_of_week'].tolist()
    filtered_data.append(tuple(hour_data))

# Store the filtered data as a tuple
filtered_data_tuple = tuple(filtered_data)

# Print the result
for hour, data in enumerate(filtered_data_tuple):
    print(f"For hour {hour}: {data}")

In [None]:
from collections import Counter

# Count the occurrences of each day across all hours
day_counts = Counter([day for hour_data in filtered_data_tuple for day in hour_data])

# Get the days that occurred at least 8 times
days_to_keep = set(day for day, count in day_counts.items() if count >= 0)

# Filter the tuples by removing the days that occurred fewer than 8 times
filtered_data_tuple_filtered = tuple([tuple(day for day in hour_data if day in days_to_keep) for hour_data in filtered_data_tuple])

# Print the result
for hour, data in enumerate(filtered_data_tuple_filtered):
    print(f"For hour {hour}: {data}")

In [None]:
# Convert the filtered_data_tuple_filtered into a list of transactions
transactions = list(filtered_data_tuple_filtered)
# Initialize TransactionEncoder
te = TransactionEncoder()
# Apply TransactionEncoder to the list of transactions
te_ary = te.fit(transactions).transform(transactions)
# Convert the encoded array into a DataFrame
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

# Apply Apriori algorithm to find frequent itemsets
frequent_itemsets = apriori(df_encoded, min_support=0.6, use_colnames=True)

# Sort the frequent itemsets by support in descending order
frequent_itemsets = frequent_itemsets.sort_values(by='support', ascending=False)

In [None]:
frequent_itemsets

In [None]:
# Create new dataframe with unique support values
unique_df = frequent_itemsets.drop_duplicates(subset='itemsets')

# Print the new dataframe
print(unique_df)

### Patient 3

In [None]:

df3['local_date'] = pd.to_datetime(df3['local_date'])


start_date = df3['local_date'].min()
end_date = df3['local_date'].max()


date_range = pd.date_range(start=start_date, end=end_date, freq='D')


all_dates_df = pd.DataFrame({'local_date': date_range})


all_hours = range(24)


all_hours_df = pd.DataFrame({'hour': all_hours})


all_dates_hours_df = all_dates_df.assign(key=1).merge(all_hours_df.assign(key=1), on='key').drop('key', axis=1)


merged_df = all_dates_hours_df.merge(df1, on=['local_date', 'hour'], how='left')


merged_df['compliant_min'] = merged_df['compliant_min'].fillna(0)


merged_df = merged_df.sort_values(['local_date', 'hour']).reset_index(drop=True)


print(merged_df)

In [None]:
df3=merged_df.copy()

In [None]:
# Create a dictionary to map unique days to day names
unique_days1 = df3['local_date'].dt.date.unique()
day_names1 = {day: f'day{i+1}' for i, day in enumerate(unique_days1)}

# Add a new column with day names
df3['day'] = df3['local_date'].dt.date.map(day_names1)

In [None]:
df3=df3.sort_values(by=['local_date','hour'],inplace=False)

In [None]:
generate_heatmap1(df3)

In [None]:
df3= df3[df3['compliant_min'] < 30]

In [None]:
# Group the DataFrame by 'day'
grouped_by_day = df3.groupby('day')

# Filter and keep only the days with more than 5 rows where 'compliant_min' is less than 30
filtered_days = grouped_by_day.filter(lambda x: (x['compliant_min'] < 30).sum() > 6)


In [None]:
# Filter the DataFrame for each hour
filtered_data = []
for hour in range(24):
    hour_data = filtered_days[filtered_days['hour'] == hour]['day'].tolist()
    filtered_data.append(tuple(hour_data))

# Store the filtered data as a tuple
filtered_data_tuple = tuple(filtered_data)

# Print the result
for hour, data in enumerate(filtered_data_tuple):
    print(f"For hour {hour}: {data}")

In [None]:
from collections import Counter

# Count the occurrences of each day across all hours
day_counts = Counter([day for hour_data in filtered_data_tuple for day in hour_data])

# Get the days that occurred at least 8 times
days_to_keep = set(day for day, count in day_counts.items() if count >= 5)

# Filter the tuples by removing the days that occurred fewer than 8 times
filtered_data_tuple_filtered = tuple([tuple(day for day in hour_data if day in days_to_keep) for hour_data in filtered_data_tuple])

# Print the result
for hour, data in enumerate(filtered_data_tuple_filtered):
    print(f"For hour {hour}: {data}")

In [None]:
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [None]:
# Convert the filtered_data_tuple_filtered into a list of transactions
transactions = list(filtered_data_tuple_filtered)

In [None]:
# Initialize TransactionEncoder
te = TransactionEncoder()
# Apply TransactionEncoder to the list of transactions
te_ary = te.fit(transactions).transform(transactions)
# Convert the encoded array into a DataFrame
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)


In [None]:
# Apply Apriori algorithm to find frequent itemsets
frequent_itemsets = apriori(df_encoded, min_support=0.5, use_colnames=True)


In [None]:
# Sort the frequent itemsets by support in descending order
frequent_itemsets = frequent_itemsets.sort_values(by='support', ascending=False)

In [None]:
print(frequent_itemsets)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Sample data (replace this with your actual data)
data = {
    'support': [0.875, 0.666667, 0.583333, 0.541667, 0.541667, 0.541667],
    'itemsets': [['day2'], ['day28'], ['day4'], ['day14'], ['day14', 'day2'], ['day28', 'day2']]
}

# Create a pandas DataFrame from the data
df = pd.DataFrame(data)

# Convert itemsets to strings
df['itemsets'] = df['itemsets'].apply(lambda x: ', '.join(x))

# Sort the DataFrame by support values in descending order
df = df.sort_values(by='support', ascending=False)

# Create a bar plot
plt.figure(figsize=(10, 6))
plt.bar(df['itemsets'], df['support'], color='skyblue')
plt.xlabel('Itemsets')
plt.ylabel('Support')
plt.title('Support Values of Frequent Itemsets')
plt.xticks(rotation=45)
plt.ylim(0, 1)  # Set the y-axis limit to show support values from 0 to 1
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()

# Show the plot
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Sample data (replace this with your actual data)
data = {
    'Support': [1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000],
    'itemsets': ['{day10}', '{day11}', '{day44}', '{day43}', '{day42}', '{day6}', '{day45}', '{day41}', '{day12}', '{day40}']
}

# Create a pandas DataFrame from the data
df = pd.DataFrame(data)

# Convert itemsets to strings
df['itemsets'] = df['itemsets'].str.strip('{}')

# Sort the DataFrame by support values in descending order
df = df.sort_values(by='Support', ascending=False)

# Create a bar plot
plt.figure(figsize=(10, 6))
plt.bar(df['itemsets'], df['Support'], color='skyblue')
plt.xlabel('Itemsets')
plt.ylabel('Support')
plt.title('Support Values of Frequent Itemsets')
plt.xticks(rotation=45)
plt.ylim(0, 1)  # Set the y-axis limit to show support values from 0 to 1
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()

# Show the plot
plt.show()
