In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import make_interp_spline

# Load the data from the provided spreadsheets
file1 = '../scopus/image_vs_time/scopus_image.csv'
file2 = '../scopus/image_vs_time/scopus_time_series.csv'

df1 = pd.read_csv(file1)
df2 = pd.read_csv(file2)

years_df1 = df1['Year']
years_df2 = df2['Year']

# Count the occurrences of each year in both dataframes
count_years_df1 = years_df1.value_counts().sort_index()
count_years_df2 = years_df2.value_counts().sort_index()

# Filtering the counts to include only years between 1990 and 2024
count_years_df1_filtered = count_years_df1[(count_years_df1.index >= 1990) & (count_years_df1.index <= 2024)]
count_years_df2_filtered = count_years_df2[(count_years_df2.index >= 1990) & (count_years_df2.index <= 2024)]

# Create a smooth curve using scipy's make_interp_spline for interpolation
years1 = count_years_df1_filtered.index.values
values1 = count_years_df1_filtered.values

years2 = count_years_df2_filtered.index.values
values2 = count_years_df2_filtered.values

# Create more points for a smoother curve
years_smooth1 = np.linspace(years1.min(), years1.max(), 300)
years_smooth2 = np.linspace(years2.min(), years2.max(), 300)

# Generate smooth spline curves
spline1 = make_interp_spline(years1, values1, k=3)
values_smooth1 = spline1(years_smooth1)

spline2 = make_interp_spline(years2, values2, k=3)
values_smooth2 = spline2(years_smooth2)

# Plotting the smooth curves
plt.figure(figsize=(12, 6))

# Plot the smooth lines
plt.plot(years_smooth1, values_smooth1, label='image AND ( classification OR segmentation )', linewidth=2)
plt.plot(years_smooth2, values_smooth2, label='"time-series" AND ( forecasting OR prediction )', linewidth=2)

# Fill area under the curves
plt.fill_between(years_smooth1, values_smooth1, alpha=0.3)
plt.fill_between(years_smooth2, values_smooth2, alpha=0.3)

# Setting labels, title, and legend
plt.xlabel('Year', fontsize=14)
plt.ylabel('Number of Publications', fontsize=14)
plt.title('Scopus Search for Publications with "deep learning" OR\n"neural network" by Year (1990 - 2024)', fontsize=18)
plt.xticks(range(1990, 2025), rotation=90)
plt.tick_params(labelsize=12)
plt.legend(fontsize=14)
plt.grid(axis='y')

# Remove white space on left and right
plt.xlim(1990, 2024)
plt.margins(x=0)

# Save the plot
plt.savefig("plots/scopus/image_vs_time.pdf", dpi=600, bbox_inches='tight')  # Save the plot with proper bounding
