In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from sklearn.metrics import r2_score

In [None]:
# Load the dataset and display the dataset contents

df = pd.read_csv("datasets/china_gdp.csv")
print(df)

In [None]:
# Extract Features
x_data = df["Year"]
y_data = df["Value"]


# Define Logistic Function
def logistic(x, a, b, c):
    return a / (1 + np.exp(-b * (x - c)))


# Initial Parameter Guess [max value, growth rate, inflection point]
p0 = [max(y_data), 0.03, 1990]

# Fit logistic model
params, _ = curve_fit(logistic, x_data, y_data, p0)
a, b, c = params
print(f"Fitted parameters: a={a:.2f}, b={b:.4f}, c={c:.2f}")

In [None]:
# Predict & Evaluate

y_pred = logistic(x_data, a, b, c)
r2 = r2_score(y_data, y_pred)
print(f"RÂ² Score: {r2:.4f}")

In [None]:
# Plot Actual vs Predicted

plt.figure(figsize=(10, 6))
plt.scatter(x_data, y_data, color="red", label="Actual Data")
plt.plot(x_data, y_pred, color="blue", linewidth=2, label="Predicted Logistic Model")
plt.title("China GDP (1950-2014): Actual vs Predicted")
plt.xlabel("Year")
plt.ylabel("GDP in Billions")
plt.legend()
plt.grid(linestyle="--", alpha=0.7)
plt.show()

In [None]:
# Plot Actual vs Predicted

plt.figure(figsize=(10, 6))
plt.scatter(y_data, y_pred, color="red", label="Actual Data")
plt.plot([y_data.min(), y_data.max()], [y_data.min(), y_data.max()], "b--")
plt.title("Actual vs Predicted GDP Values")
plt.xlabel("Actual GDP")
plt.ylabel("Predicted GDP")
plt.legend()
plt.grid(linestyle="--", alpha=0.7)
plt.show()