In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
import numpy as np
import pandas as pd 
import seaborn as sns
from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.svm import SVR
%matplotlib inline

In [None]:
df = pd.read_csv('../input/world-population-by-year/WorldPopulation.csv')
df.shape

In [None]:
df.isna().sum()

In [None]:
df.columns

In [None]:
df.head()

In [None]:
df[df.columns[1:]].describe()

# **Population Growth**

In [None]:
plt.style.use('fivethirtyeight')
sns.relplot(data=df[['Year','Population']], x='Year', y='Population', kind='line', height=10, linewidth=1, color='b')
plt.title('Population Growth')
plt.show()

# **Net Change per year**

In [None]:
plt.style.use('fivethirtyeight')
sns.relplot(data=df[['Year','NetChange']], x='Year', y='NetChange', kind='line', height=10, linewidth=2, color='g')
plt.bar(x=df['Year'], height=df['NetChange'])
plt.title('Net Change Per year')
plt.show()

# **World Population vs Urban Population**

In [None]:
plt.style.use('fivethirtyeight')
plt.figure(figsize=(10, 10))
plt.plot(df['Year'], df['Population'],linewidth=2, linestyle="dashed",color='blue', label='World Population')
plt.plot(df['Year'], df['Urban'], linewidth=2, linestyle="dashed",color='orange', label='Urban Population')
plt.title('World Population vs Urban Population')

plt.xlabel('Year')
plt.ylabel('Population')
plt.legend()
plt.show()

In [None]:
df[['Population', 'Urban']].corr()

In [None]:
plt.style.use('fivethirtyeight')
plt.figure(figsize=(10, 10))


plt.plot(df['Year'], df['Population'], color='red', label='World Population', linestyle='dashed')
plt.plot(df['Year'], df['Urban'], color='orange', label='Urban Population', linestyle='dashed')

plt.title('Population and Density')
plt.xlabel('Year')
plt.ylabel('Population')
plt.legend()
plt.show()

In [None]:
df['ChangePerc'].plot(kind='line')
plt.show()

In [None]:
plt.figure(figsize=(10,10))
plt.bar(df['Density'],df['UrbanPerc'], color='orange')

plt.xlabel('Density')
plt.ylabel('UrbanPerc')
plt.show()

# **Feature Scaling**

In [None]:
x, y = df.drop(['Population'], axis=1), df['Population']

In [None]:
scale = MinMaxScaler()
scale.fit(x)

In [None]:
new_x = scale.fit_transform(x)

# **Splitting and training the data**

In [None]:
x_train, x_test, y_train, y_test = train_test_split(new_x, y, test_size=0.3)

In [None]:
model = LinearRegression()
model.fit(x_train, y_train)

# **model score and predictions**

In [None]:
model.score(x_test, y_test)

In [None]:
model.score(x_train, y_train)

In [None]:
y_pred_test = model.predict(x_test)
y_pred_train = model.predict(x_train)

In [None]:
test = pd.DataFrame({
    'Year Test':scale.inverse_transform(x_test)[:,0],
    'Y test':y_test,
    'Y test Predicted':y_pred_test
})

train = pd.DataFrame({
    'Year Train':scale.inverse_transform(x_train)[:,0],
    'Y train':y_train,
    'Y train predicted':y_pred_train
})

# **Comparing Actual and Predicted**

In [None]:
test = test.sort_values(by=['Year Test'])
train = train.sort_values(by=['Year Train'])

In [None]:
plt.style.use('seaborn')
plt.figure(figsize=(10,10))
plt.scatter(test['Year Test'], test['Y test'], color='b', label='Actual', linewidth=1.8, marker="*")
plt.plot(test['Year Test'], test['Y test Predicted'], color='r', linewidth=1.5, label="predicted")
plt.title("Population prediction on Y test")
plt.xlabel('year')
plt.ylabel('Population')
plt.legend()
plt.show()

In [None]:
plt.style.use('seaborn')
plt.figure(figsize=(10,10))
plt.scatter(train['Year Train'], train['Y train'], color='b', label='Actual', linewidth=1.8, marker="*")
plt.plot(train['Year Train'], train['Y train predicted'], color='r', linewidth=1.5, label="predicted")
plt.title("Population prediction on Y train")
plt.xlabel('year')
plt.ylabel('Population')
plt.legend()
plt.show()

In [None]:
test.sample(5)

In [None]:
train.sample(5)

# **Model as file**

In [None]:
import joblib
joblib.dump(model,'predict-population-model')

In [None]:
!ls