In [None]:
# Reload with header row index 2
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

plt.close('all')

df = pd.read_excel('Screening_xxx.xlsx', sheet_name='XXX', header=2)
print('Columns preview:')
print(df.columns[:25])

# Clean Year
df['Year'] = pd.to_numeric(df['Year'], errors='coerce')

# Filter 2019-2024
df_recent = df[df['Year'].between(2019, 2024, inclusive='both')]
print('Record count 2019-2024:')
print(df_recent.shape[0])

# Bar chart
pub_counts = df_recent['Year'].value_counts().sort_index()
plt.figure(figsize=(8,4))
ax = sns.barplot(x=pub_counts.index.astype(int), y=pub_counts.values, palette='viridis')
ax.set_xlabel('Publication Year')
ax.set_ylabel('Number of Papers')
ax.set_title('Publication Trends (2019-2024)')
for i,v in enumerate(pub_counts.values):
    ax.text(i, v+0.1, str(v), ha='center')
plt.tight_layout()
plt.show()

# Heatmap Platform vs Algorithm
platform_col = 'Platforms Used'
alg_col = 'Algorithms Used'
if platform_col in df_recent.columns and alg_col in df_recent.columns:
    df_exp = df_recent.copy()
    df_exp[platform_col] = df_exp[platform_col].astype(str).str.split(',')
    df_exp[alg_col] = df_exp[alg_col].astype(str).str.split(',')
    df_exp = df_exp.explode(platform_col).explode(alg_col)
    df_exp[platform_col] = df_exp[platform_col].str.strip()
    df_exp[alg_col] = df_exp[alg_col].str.strip()
    matrix = pd.crosstab(df_exp[platform_col], df_exp[alg_col])
    plt.figure(figsize=(10,6))
    sns.heatmap(matrix, annot=True, fmt='d', cmap='YlGnBu')
    plt.title('Platform vs Algorithm Usage (2019-2024)')
    plt.xlabel('Algorithms')
    plt.ylabel('Platforms')
    plt.tight_layout()
    plt.show()
else:
    print('Columns missing for heatmap.')



In [None]:
# Heatmap: Platforms vs Methodology
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# ensure df_recent exists
try:
    df_recent
except NameError:
    df_recent = pd.read_excel('Screening_xxx.xlsx', sheet_name='XXX', header=2)
    df_recent['Year'] = pd.to_numeric(df_recent['Year'], errors='coerce')
    df_recent = df_recent[df_recent['Year'].between(2019, 2024, inclusive='both')]

platform_col = 'Platforms Used'
method_col = 'Methodology'

df_pm = df_recent[[platform_col, method_col]].copy()
for col in [platform_col, method_col]:
    df_pm[col] = df_pm[col].astype(str).str.split(',')

df_pm = df_pm.explode(platform_col).explode(method_col)
for col in [platform_col, method_col]:
    df_pm[col] = df_pm[col].str.strip()

# drop empty
df_pm = df_pm[(df_pm[platform_col] != '') & (df_pm[method_col] != '')]

matrix_pm = pd.crosstab(df_pm[platform_col], df_pm[method_col])
print('Platforms-Methodology matrix size:')
print(matrix_pm.shape)

plt.figure(figsize=(12,7))
ax = sns.heatmap(matrix_pm, annot=True, fmt='d', cmap='YlOrRd')
plt.xlabel('Methodology')
plt.ylabel('Platform')
plt.title('Platforms vs Methods Used (2019-2024)')
plt.tight_layout()
plt.show()