In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
brain=pd.read_csv('/kaggle/input/brain-weight-in-humans/dataset.csv')

#### Age Range: 1 represents >18 years of age , 2 represents <18 years of age
#### Gender: 1 represents Male , 2 represents Female

In [None]:
brain.head()

In [None]:
brain.tail()

In [None]:
brain.info()

In [None]:
# checking for null values
brain.isnull().sum()

In [None]:
# checking for duplicates
duplicate=brain[brain.duplicated()]
duplicate

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('dark_background')

In [None]:
df=brain[:]
df.Gender = df.Gender.astype(str)
gender={'1':'Male','2':'Female'}
df['Gender']=df['Gender'].map(gender)
df.head()

In [None]:
df.rename({'Age Range': 'Age'}, axis=1, inplace=True)
df.Age = df.Age.astype(str)
age={'1':'18 years and above','2':'0 to 18 years'}
df['Age']=df['Age'].map(age)
df.head()

In [None]:
plt.figure(figsize=(10,5))
sns.violinplot(x='Age',y='Head Size(cm^3)',data=df,palette='magma',hue='Gender')

#### > 18 years and above have larger brain head size  
#### > Females have comparatively smaller brain head size than males

In [None]:
plt.figure(figsize=(10,5))
sns.boxplot(x='Age',y='Brain Weight(grams)',data=df,palette='Set2',hue='Gender')

#### > Males who are 18 years and above have more brain head weight.
#### > Females have lower brain weight.
#### > 18 years and above as well as males who are below 18 contains outliers in brain weight.

In [None]:
sns.displot(brain['Brain Weight(grams)'],kde=True)

#### > The above plot shows normal distribution for brain weight 

In [None]:
sns.pairplot(brain)

#### > Brain weight and head size are most correlated. 

In [None]:
sns.lmplot(x='Brain Weight(grams)',y='Head Size(cm^3)',data=brain)

In [None]:
brain.corr()

In [None]:
plt.figure(figsize=(10,5))
sns.heatmap(brain,cmap='magma')

In [None]:
brain.columns

In [None]:
X=brain[['Gender', 'Age Range', 'Head Size(cm^3)']]
y=brain['Brain Weight(grams)']

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=101)

In [None]:
from sklearn.linear_model import LinearRegression


In [None]:
lm=LinearRegression()

In [None]:
lm.fit(X_train,y_train)

In [None]:
lm.intercept_

In [None]:
coeff_df = pd.DataFrame(lm.coef_,X.columns,columns=['Coefficient'])
coeff_df

#### > Holding all other features fixed, 1 unit increase in the head size indicates 0.251846gram increase in brain weight.

In [None]:
prediction=lm.predict(X_test)

In [None]:
sns.regplot(x=y_test,y=prediction)

In [None]:
sns.displot((y_test-prediction),bins=50,kde=True)

In [None]:
from sklearn import metrics

In [None]:
metrics.mean_absolute_error(y_test,prediction)

In [None]:
metrics.mean_squared_error(y_test,prediction)

In [None]:
np.sqrt(metrics.mean_absolute_error(y_test,prediction))

In [None]:
print("Train Accuracy:",lm.score(X_train, y_train))

In [None]:
lm.score(X_test,y_test)