In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# **Importing required libraries**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

# **Importing the dataset**

In [None]:
college = pd.read_csv('../input/forbes-americas-top-colleges-2019/ForbesAmericasTopColleges2019.csv')

In [None]:
college.head()

In [None]:
college.shape

In [None]:
college.nunique()

# **Visualizations**

In [None]:
sns.relplot(data=college, x="Rank", y="Undergraduate Population", hue="Acceptance Rate", col="Public/Private")

In [None]:
sns.lmplot(data=college, x="Total Annual Cost", y="Undergraduate Population", col="Public/Private", hue = 'Public/Private')

In [None]:
sns.stripplot(x='Public/Private', y='Alumni Salary', alpha = 0.5, data=college)

In [None]:
sns.lineplot(data = college, x='Total Annual Cost', y = 'Average Grant Aid', hue = 'Public/Private')

# **Dropping unwanted columns**

In [None]:
clg = college.drop(columns = ['Name','City','State','Website','Rank'])

In [None]:
clg.head()

# **Checking for null values**

In [None]:
clg.isnull().sum()

# **Filling null values using interpolation**

In [None]:
clg['Net Price'] = clg['Net Price'].interpolate()
clg['Average Grant Aid'] = clg['Average Grant Aid'].interpolate()
clg['Alumni Salary'] = clg['Alumni Salary'].interpolate()
clg['Acceptance Rate'] = clg['Acceptance Rate'].interpolate()
clg['SAT Lower']= clg['SAT Lower'].interpolate()
clg['SAT Upper']=clg['SAT Upper'].interpolate()
clg['ACT Lower']=clg['ACT Lower'].interpolate()
clg['ACT Upper']=clg['ACT Upper'].interpolate()

In [None]:
clg.isnull().sum()

# **Dropping 'Alumni Salary' cloumn as we will use it as our Dependent Variable**

In [None]:
clg_1 = clg.drop(columns=['Alumni Salary'])

In [None]:
clg_1.head()

# **Creating Independent Variable (x) and Dependent Variable (y)**

In [None]:
x = clg_1.iloc[:,0:].values

In [None]:
y = clg['Alumni Salary'].values

# **Handling the categoerical 'Public/Private' column**

In [None]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

In [None]:
ct = ColumnTransformer(transformers = [('encoder', OneHotEncoder(), [0])], remainder = 'passthrough')

In [None]:
x = np.array(ct.fit_transform(x))

In [None]:
x

# **Splitting data into training and testing sets**

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)

# **Scaling x_train and x_train**

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
sc = StandardScaler()

In [None]:
x_train_1 = sc.fit_transform(x_train)

In [None]:
x_test_1 = sc.transform(x_test)

# **Creating activation model**

In [None]:
ann = tf.keras.models.Sequential()

# **1st Hidden layer**

In [None]:
ann.add(tf.keras.layers.Dense(units=6, activation = 'relu'))

# **2nd Hidden layer**

In [None]:
ann.add(tf.keras.layers.Dense(units=6, activation = 'relu'))

# **3rd Hidden layer**

In [None]:
ann.add(tf.keras.layers.Dense(units=6, activation = 'relu'))

# **Output layer**

In [None]:
ann.add(tf.keras.layers.Dense(units=1, activation = 'linear'))

# **Defining optimizer and loss function**

In [None]:
ann.compile(optimizer = 'adam', loss = 'mse')

# **Training our data on the activation model**

In [None]:
ann.fit(x_train_1, y_train, batch_size = 32, epochs = 10000)

# **Predicting our x_test based on model**

In [None]:
y_pred=ann.predict(x_test_1)

In [None]:
y_pred.shape

# **Concatinating y_test and y_pred**

In [None]:
print(np.concatenate((y_pred.reshape(len(y_pred),1),y_test.reshape(len(y_test),1)),1))

In [None]:
y = y_test - y_pred

In [None]:
y

# **Visualizations based on the prediction**

In [None]:
plt.scatter(y_test, y_pred)

In [None]:
plt.scatter(x_test[:,6],y_test)
plt.scatter(x_test[:,6],y_pred, color = 'red')
plt.xlabel('Total Annual Cost')
plt.ylabel('Alumni Salary')

In [None]:
plt.scatter(x_test[:,3],y_test)
plt.scatter(x_test[:,3],y_pred, color = 'red')
plt.xlabel('Student Population')
plt.ylabel('Alumni Salary')