### Prediction of Chances of Admit for Graduate Programs in US Universities

### Our Objective:
* Determine the most important factors that contribute to a student's chance of admission, and select the most accurate model to predict the probability of admission.
* The predicted output gives them a fair idea about their admission chances in a particular university. 

### Getting to know the dataset!
GA dataset contains various paraameters which are important for admission into graduate programs in universities. The features included are :
* GRE Scores ( out of 340 ).
* TOEFL Scores ( out of 120 ).
* University Rating ( out of 5 ).
* Statement of Purpose and Letter of Recommendation Strength ( out of 5 ).
* Undergraduate GPA ( out of 10 ).
* Research Experience ( either 0 or 1 ).
* Chance of Admit ( ranging from 0 to 1 ).

### Approach
* Explore our data to check for imbalance and missing values.
* Explore the correlation between various features in the dataset.
* Split the preprocessed dataset into train and test sets respectively.
* Create and Train a AdaBoost Classifier using mlpack.
* We'll perform evaluation on our test set using metrics such as Accuracy, ROC AUC to quantify the performance of out model.

In [None]:
!wget -q http://datasets.mlpack.org/Admission_Predict.csv

In [1]:
// Import necessary library headers.
#include <mlpack/xeus-cling.hpp>
#include <mlpack/core.hpp>
#include <mlpack/core/data/split_data.hpp>
#include <mlpack/methods/decision_tree/decision_tree.hpp>
#include <mlpack/methods/adaboost/adaboost.hpp>

In [2]:
#define WITHOUT_NUMPY 1
#include "matplotlibcpp.h"
#include "xwidgets/ximage.hpp"
#include "../utils/preprocess.hpp"
#include "../utils/plot.hpp"

namespace plt = matplotlibcpp;

In [3]:
using namespace mlpack;

In [4]:
using namespace mlpack::data;

In [5]:
using namespace mlpack::tree;

In [6]:
using namespace mlpack::adaboost;

In [7]:
// Utility functions for evaluation metrics.
double accuracy(const arma::Row<size_t>& yPreds, const arma::Row<size_t>& yTrue)
{
    const size_t correct = arma::accu(yPreds == yTrue);
    return (double)correct / (double)yTrue.n_elem;
}

In [8]:
double precision(const size_t truePos, const size_t falsePos)
{
    return (double)truePos / (double)(truePos + falsePos);
}

In [9]:
double recall(const size_t truePos, const size_t falseNeg)
{
    return (double)truePos / (double)(truePos + falseNeg);
}

In [10]:
double f1score(const size_t truePos, const size_t falsePos, const size_t falseNeg)
{
    double prec = precision(truePos, falsePos);
    double rec = precision(truePos, falseNeg);
    return 2 * (prec * rec) / (prec + rec);
}

In [11]:
void classification_report(const arma::Row<size_t>& yPreds, const arma::Row<size_t>& yTrue)
{
    arma::Row<size_t> uniqs = arma::unique(yTrue);
    std::cout << std::setw(29) << "precision" << std::setw(15) << "recall" 
              << std::setw(15) << "f1-score" << std::setw(15) << "support" 
              << std::endl << std::endl;
    
    for(auto val: uniqs)
    {
        size_t truePos = arma::accu(yTrue == val && yPreds == val && yPreds == yTrue);
        size_t falsePos = arma::accu(yPreds == val && yPreds != yTrue);
        size_t trueNeg = arma::accu(yTrue != val && yPreds != val && yPreds == yTrue);
        size_t falseNeg = arma::accu(yPreds != val && yPreds != yTrue);
        
        std::cout << std::setw(15) << val
                  << std::setw(12) << std::setprecision(2) << precision(truePos, falsePos) 
                  << std::setw(16) << std::setprecision(2) << recall(truePos, falseNeg) 
                  << std::setw(14) << std::setprecision(2) << f1score(truePos, falsePos, falseNeg)
                  << std::setw(16) << truePos
                  << std::endl;
    }
}

In [12]:
!cat Admission_Predict.csv | sed 1d > Admission_Predict_trim.csv

In [13]:
// Load the preprocessed dataset into armadillo matrix.
arma::mat gradData;
data::Load("Admission_Predict_trim.csv", gradData);

In [14]:
std::cout << gradData.submat(0, 0, gradData.n_rows-1, 5).t() << std::endl;

   3.3700e+02   1.1800e+02   4.0000e+00   4.5000e+00   4.5000e+00   9.6500e+00   1.0000e+00   9.2000e-01
   3.2400e+02   1.0700e+02   4.0000e+00   4.0000e+00   4.5000e+00   8.8700e+00   1.0000e+00   7.6000e-01
   3.1600e+02   1.0400e+02   3.0000e+00   3.0000e+00   3.5000e+00   8.0000e+00   1.0000e+00   7.2000e-01
   3.2200e+02   1.1000e+02   3.0000e+00   3.5000e+00   2.5000e+00   8.6700e+00   1.0000e+00   8.0000e-01
   3.1400e+02   1.0300e+02   2.0000e+00   2.0000e+00   3.0000e+00   8.2100e+00            0   6.5000e-01
   3.3000e+02   1.1500e+02   5.0000e+00   4.5000e+00   3.0000e+00   9.3400e+00   1.0000e+00   9.0000e-01



In [15]:
heatmap("Admission_Predict.csv", "coolwarm", "Correlation Heatmap", 1, 10, 10);
auto img = xw::image_from_file("Correlation Heatmap.png").finalize();
img

A Jupyter widget with unique id: 3d0683399be0444db6cabfd421ab7b06

In [16]:
arma::Row<size_t> targets = arma::conv_to<arma::Row<size_t>>::from(gradData.row(7) > 0.8);
gradData.shed_row(gradData.n_rows-1);

In [17]:
// Split the dataset into train and test sets using mlpack.
arma::mat Xtrain, Xtest;
arma::Row<size_t> Ytrain, Ytest;
mlpack::data::Split(gradData, targets, Xtrain, Xtest, Ytrain, Ytest, 0.25);

In [22]:
ID3DecisionStump ds(Xtrain, Ytrain, 2);

In [23]:
AdaBoost<ID3DecisionStump> ab(Xtrain, Ytrain, 2, ds, 50, 1e-10);

In [24]:
arma::Row<size_t> output;
arma::mat probs;
ab.Classify(Xtest, output, probs);

In [25]:
// Save the yTest and probabilities into csv for generating ROC AUC plot.
data::Save("probabilities.csv", probs);
data::Save("ytest.csv", Ytest);

In [26]:
// Model evaluation metrics.
std::cout <<  "Accuracy: " << accuracy(output, Ytest) << std::endl;
classification_report(output, Ytest);

Accuracy: 0.936
                    precision         recall       f1-score        support

              0        0.93            0.99          0.96              96
              1        0.95            0.75          0.84              21


In [27]:
plotRocAUC("ytest.csv", "probabilities.csv", "roc_auc");
auto img = xw::image_from_file("roc_auc.png").finalize();
img

A Jupyter widget with unique id: d47fca14de4e45cfb231f98ae8955503