In [None]:
import pandas as pd     # pandas is a dataframe library
import matplotlib.pyplot as plt     # matplotlib.pyplot plots data
import seaborn as sns    # seaborn plots data
from sklearn.linear_model import LinearRegression   # for linear regression
from sklearn.metrics import mean_squared_error    # for MSE
from sklearn.preprocessing import PolynomialFeatures    # for polynomial regression
from sklearn.pipeline import make_pipeline


In [None]:
# Load and read data
data = pd.read_csv('countries_gdp.csv')

# Exoplanaory data analysis

data = data[data['Country Name'] == 'Indonesia']
data = data.melt(id_vars=['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code'], var_name='Year', value_name='GDP')
data = data.drop(['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code'], axis=1)
data = data.dropna(axis=0, how='any')
data.head(); data.tail()

In [None]:
fig = plt.figure(figsize=(15,10))
sns.catplot(x='Year', y='GDP', data=data, kind='bar', height=5, aspect=2)
plt.title('Indonesia GDP per Year')
plt.xticks(rotation=90)
plt.show()

In [None]:
X = data[['Year']].values  # Features (Year)
y = data['GDP'].values     # Target variable (GDP)

degree = 3

model = make_pipeline(PolynomialFeatures(degree), LinearRegression())
model.fit(X, y)

prediction = model.predict([[2023]])

print('Prediction of Indonesia GDP in 2023: ', prediction)

mse = mean_squared_error(y, model.predict(X))
print('MSE: ', mse)


In [None]:
fig = plt.figure(figsize=(10,5))
sns.set_style('ticks')
sns.scatterplot(x='Year', y='GDP', data=data, color='black')
sns.lineplot(x='Year', y=model.predict(X), data=data, color='red')

plt.title('Indonesia GDP per Year')
plt.xticks(rotation=90)