# Project 01: Campust Recruitment Data Analysis
## SAS programs and outputs
### 01. Data Exploration

In [None]:
/* Create library name project */
libname project '/folders/myfolders/Project/placement';

/* Import dataset */
proc import 
		datafile='/folders/myfolders/Project/placement/placement_data.csv' 
		out=project.placement dbms=csv replace;
run;

/* Check charicteristics of dataset and confirm the dataset correctly read */
proc contents data=project.placement;
run;

proc print data=project.placement noobs;
	title "Campus Recruitment";
run;

/* Check frequencies of categorical variables in dataset */
proc freq data=project.placement nlevels order=freq;
	table gender ssc_b hsc_b hsc_s workexp degree_t specialisation status;
	title "Frequencies of Categorical Variables";
run;

### 02. Graphical Analysis

In [None]:
/* Identify associations in ANOVA with boxplots */
/* Between salary and other categorcial variables */
/* macro for graphical analysis connecting means */
/* Part A */

%let categorical = gender hsc_s degree_t workexp specialisation;

%macro box(response=, regressor=);

%let i = 1;

%do %while(%scan(&regressor,&i,%str( )) ^= %str());

    %let var = %scan(&regressor,&i,%str( ));
    
	proc sgplot data = project.placement;
		where &response ne .;
		vbox &response / category = &var connect = mean;
		title "&response differences across &var";
    
    %let i = %eval(&i + 1);

%end;

run;
quit;
title;

%mend box;
%box(response=salary, regressor=&categorical);


/*identify associations in linear regression with scatter plots */
/* between salary and other continuous variables */
/* macro for graphical analysis */
/* Part B */

proc sgscatter data=project.placement;
	where salary ne .;
	plot salary*mba_p / reg;
	title "Association of MBA % with Salary";
run;
title;


%let factor2 = ssc_p hsc_p degree_P ptest_p mba_p;

proc sgscatter data=project.placement;
	where salary ne .;
	plot salary*(&factor2) / reg;
	title "Association of Continuous Variables with Salary";
run;
title;

### 03. One-Way ANOVA

In [None]:
/* Performing a one-way ANOVA to identify the differences */
/* between the mean salary of students who get employed */
/* based on each categorical variable. */
/* Macro usage */

ods graphics;

%let category = gender hsc_s degree_t workexp specialisation;

%macro oneway(response=, predictor=);

%let i = 1;
%do %while(%scan(&predictor,&i,%str( )) ^= %str());

		%let factor = %scan(&predictor,&i,%str( ));
		
		proc glm data=project.placement plots=diagnostics;
			where &response ne .;
			class &factor;
			model &response = &factor;
			means &factor / hovtest=levene;
			title "One-Way ANOVA with &factor as Predictor";

		%let i = %eval(&i + 1);
%end;
	
run;
quit;
title;

%mend oneway;
%oneway(response=salary, predictor=&category);

### 04. Correlation Analysis

In [None]:
/* Producing correlation statistics and scatter plots */
/* Part A */

%let interval = ssc_p hsc_p degree_p ptest_p mba_p;

ods graphics / reset=all imagemap;
proc corr data=project.placement 
			   rank plots(only)=scatter(nvar=all ellipse=none);
	where salary ne .;
	var &interval;
	with salary;
	id sl_no;
	title "Correlations and Scatter Plots with Salary";
run;
quit;
title;


/* Production correlation statistics among potential predictor variables */
/* Part B */

ods graphics off;
proc corr data=project.placement nosimple best=3;
	where salary ne .;
	var &interval;
	title "Correlations and Scatter Plot Matrix of Predictors";
run;

### 05. Simple Linear Regression

In [None]:
/* Performing simple linear regression model */
/* using each continuous variables as predictor and salary as response */
/* Part A */

%let categorical=ssc_p hsc_p degree_p ptest_p mba_p;

%macro simple(response=, predictor=);

%let i = 1;
%do %while(%scan(&predictor,&i,%str( )) ^= %str( ));

	%let var = %scan(&predictor,&i,%str( ));
	
	proc reg data=project.placement;
		where &response ne .;
		model &response = &var;
		title "Simple Regression with &var as Regressor";
	
	%let i = %eval(&i + 1);
%end;

run;
title;

%mend simple;
%simple(response=salary, predictor=&categorical);


/* Performing simple linear regression model*/
/* using mba_p as predictor and salary as response */
/* Part B */

ods graphics;

proc reg data=project.placement;
	where salary ne .;
	model salary=mba_p;
	title "Simple Regression with MBA_P as Regressor";
run;
quit;
title;

### 06. Categorical Analysis

In [None]:
/* Examing the distribution of categorical variables; */
/* gender, work experience, and degree type */
/* with response variable status */
/* Part A */

proc freq data=project.placement;
	tables status gender workexp degree_t
		   gender*status workexp*status degree_t*status /
		   plots(only)=freqplot(scale=percent);
run;


/* Examing the distribution of continuous variables; */
/* placement percentage, degree percentage, and MBA percenetage */
/* with response variable status */
/* Part B */

proc univariate data=project.placement noprint;
	class status;
	var ptest_p degree_p mba_p;
	histogram ptest_p degree_p mba_p;
	inset mean std median min max / format=5.2 position=ne;
run;

### 07. Tests of Associations

In [None]:
/* Conducting a formal test to determine */
/* whehter the associations are significant */

ods graphics off;
proc freq data=project.placement;
	tables (workexp gender)*status /
			chisq expected cellchi2 nocol nopercent relrisk;
	title "Associations with Status";
run;
title;

### 08. Fitting Logistic Regression

In [None]:
/* Fitting a binary Logistic regression model */
/* to characterize the relationship between */
/* variables, Status and Degree percentage */

proc format;
	value $job 'Placed' = 1 
			   'Not Placed' = 0 ;
run;

ods graphics on;

proc logistic data=project.placement alpha=0.05
			  plots(only)=(effect oddsratio);
 	model Status(event='1') = Degree_P / clodds=pl;
 	format Status $job.; 
    title "LOGISTIC MODEL (1): Status = Degree_P";
run;
title;

ods graphics off;

### 09. Fitting Multiple Logistic Regression

In [None]:
/* Fitting a multiple logistic regression model */
/* to characterize the relationship between */
/* variables, Status and Degree_P, Gender, and WorkExp */
/* Part A */

proc format;
	value $job 'Placed' = 1
			   'Not Placed' = 0;
run;

ods graphics on;

proc logistic data=project.placement plots(only)=(effect oddsratio);
	class Gender(ref='F') WorkExp(ref='No') / param=ref;
	model Status(event='1') = Degree_P Gender WorkExp / clodds=pl;
	units Degree_P=10;
	format Status $job.;
	title "LOGISTIC MODEL (2): Status = Degree_P Gender WorkExp";
run;
title;

ods graphics off;


/* Fitting a multiple logistic regression model */
/* with interactions between predictor variables */
/* Part B */

proc format;
	value $job 'Placed' = 1
			   'Not Placed' = 0;
run;

ods graphics on;

proc logistic data=project.placement plots(only)=(effect oddsratio);
	class Gender(ref='F') WorkExp(ref='No') / param=ref;
	model Status(event='1') = Degree_P|Gender|WorkExp @2 /
		  selection=backward clodds=pl slstay=0.10;
	units Degree_P=10;
	format Status $job.;
	title "LOGISTIC MODEL (3): Backward Elimination"
		  "Status = Degree_P|Gender|WorkExp";
run;
title;

ods graphics off;


/* Fitting a multiple logistic regression model */
/* with all odds ratio for each value of the variables */
/* that are involved in an interaction form the final model */
/* Part C */

Proc format;
	value $job 'Placed' = 1
			   'Not Placed' = 0;
run;

ods graphics on;

proc logistic data=project.placement plots(only)=oddsratio(range=clip);
	class Gender(ref='F') WorkExp(ref='No') / param=ref;
	model Status(event='1') = Degree_P|Gender WorkExp;
	units Degree_P=10;
	oddsratio Degree_P / at (Gender=all) cl=pl;
	oddsratio Gender / at (Degree_P= 50 70) cl=pl;
	format Status $job.;
	title 'LOGISTIC MODEL (3.1): Status = Degree_p|Gender WorkExp';
run;
title;

ods graphics off;

### 10. Generating Predictions

In [None]:
/* Generating Predictions */
proc format;
	value $job 'Placed' = 1
			   'Not Placed' = 0;
run;

ods select none;

proc logistic data=project.placement;
	class Gender(ref='F') WorkExp(ref='No') / param=ref;
	model Status(event='1') = Degree_P|Gender WorkExp;
	units Degree_P=10;
	format Status $job.;
		store out=joboffer;
run;

ods select all;


/* Input new data */
data newcandid;	
	length Gender $3 WorkExp $3;
	input Degree_P Gender $ WorkExp $;
	datalines;
	51 F Yes
	49 M Yes
	68 F No
	63 M No
	82 F Yes
	76 M Yes
	79 M No
	87 F No
	;
run;


/* Score the new data */
proc plm restore=joboffer;
	score data=newcandid out=scored_candid / ILINK;
	title 'Predictions using PROC PLM';
run;

proc print data=scored_candid;
run;