In [None]:
import warnings
warnings.simplefilter('ignore')

In [None]:
from random import seed
from random import randrange
from random import random
from csv import reader
import pandas as pd
import numpy as np
from tqdm import tqdm
import math
from math import exp
import operator
import time

a =''#predictData path
r = list() # results
network = list() #initialize_network as global

# Load a CSV file
def load_csv(filename):
	dataset = list()
	with open(filename, 'r') as file:
		csv_reader = reader(file)
		for row in csv_reader:
			if not row:
				continue
			dataset.append(row[1:])
	return dataset

def str_column_to_float(dataset, column):
	for row in dataset:
		row[column] = float(row[column].strip())

# Convert string column to integer
def str_column_to_int(dataset, column):
	class_values = [row[column] for row in dataset]
	unique = set(class_values)
	lookup = dict()
	for i, value in enumerate(unique):
		lookup[value] = i
	for row in dataset:
		row[column] = lookup[row[column]]
	return lookup

# Find the min and max values for each column
def dataset_minmax(dataset):
	minmax = list()
	stats = [[min(column), max(column)] for column in zip(*dataset)]
	return stats

# Rescale dataset columns to the range 0-1
def normalize_dataset(dataset, minmax):
	for row in dataset:
		for i in range(len(row)-1):
			row[i] = (row[i] - minmax[i][0]) / (minmax[i][1] - minmax[i][0])

# Split a dataset into k folds
def cross_validation_split(dataset, n_folds):
	dataset_split = list()
	dataset_copy = list(dataset)
	fold_size = int(len(dataset) / n_folds)
	for i in range(n_folds):
		fold = list()
		while len(fold) < fold_size:
			index = randrange(len(dataset_copy))
			fold.append(dataset_copy.pop(index))
		dataset_split.append(fold)
	return dataset_split

# Calculate accuracy percentage
def accuracy_metric(actual, predicted):
	correct = 0
	for i in range(len(actual)):
		if actual[i] == predicted[i]:
			correct += 1
	return correct / float(len(actual)) * 100.0

# Evaluate an algorithm using a cross validation split
def evaluate_algorithm(dataset, n_folds, *args):
	folds = cross_validation_split(dataset, n_folds)
	scores = list()
	c=0
	for fold in folds:
		train_set = list(folds)
		train_set.remove(fold)
		train_set = sum(train_set, [])
		test_set = list()
		for row in fold:
			row_copy = list(row)
			test_set.append(row_copy)
			row_copy[-1] = None
		if c == 1:
			v = 'testing model '
		elif c == 0:
			v = 'training model'
		predicted = back_propagation(train_set, test_set, *args ,v)
		actual = [row[-1] for row in fold]
		accuracy = accuracy_metric(actual, predicted)
		scores.append(accuracy)
		c = c+1
	return scores

# Calculate neuron activation for an input
def activate(weights, inputs):
	activation = weights[-1]
	for i in range(len(weights)-1):
		activation += weights[i] * inputs[i]
	return activation

# Transfer neuron activation
def transfer(activation):
	return 1.0 / (1.0 + exp(-activation))

# Forward propagate input to a network output
def forward_propagate(network, row):
    inputs = row
    for layer in network:
        new_inputs = []
        for neuron in layer:
            activation = activate(neuron['weights'], inputs)
            neuron['output'] = transfer(activation)
            new_inputs.append(neuron['output'])
        inputs = new_inputs
    return inputs

# Calculate the derivative of an neuron output
def transfer_derivative(output):
	return output * (1.0 - output)

# Backpropagate error and store in neurons
def backward_propagate_error(network, expected):
	for i in reversed(range(len(network))):
		layer = network[i]
		errors = list()
		if i != len(network)-1:
			for j in range(len(layer)):
				error = 0.0
				for neuron in network[i + 1]:
					error += (neuron['weights'][j] * neuron['delta'])
				errors.append(error)
		else:
			for j in range(len(layer)):
				neuron = layer[j]
				errors.append(expected[j] - neuron['output'])
		for j in range(len(layer)):
			neuron = layer[j]
			neuron['delta'] = errors[j] * transfer_derivative(neuron['output'])

# Update network weights with error
def update_weights(network, row, l_rate):
	for i in range(len(network)):
		inputs = row[:-1]
		if i != 0:
			inputs = [neuron['output'] for neuron in network[i - 1]]
		for neuron in network[i]:
			for j in range(len(inputs)):
				neuron['weights'][j] += l_rate * neuron['delta'] * inputs[j]
			neuron['weights'][-1] += l_rate * neuron['delta']

# Train a network for a fixed number of epochs
def train_network(network, train, l_rate, n_epoch, n_outputs,v):
	with tqdm(total = len(train)*n_epoch, desc= v , bar_format = "{l_bar}{bar} [time left: {remaining} ]") as pbar:
		for epoch in range(n_epoch):
			for row in train:
				outputs = forward_propagate(network, row)
				expected = [0 for i in range(n_outputs)]
				expected[row[-1]] = 1
				backward_propagate_error(network, expected)
				update_weights(network, row, l_rate)
				pbar.update(1)

# Initialize a network
def initialize_network(n_inputs, n_hidden, n_outputs):
	network = list()
	hidden_layer = [{'weights':[random() for i in range(n_inputs + 1)]} for i in range(n_hidden)]
	network.append(hidden_layer)
	output_layer = [{'weights':[random() for i in range(n_hidden + 1)]} for i in range(n_outputs)]
	network.append(output_layer)
	return network

# Make a prediction with a network
def predict(network, row):
	outputs = forward_propagate(network, row)
	return outputs.index(max(outputs))

# Backpropagation Algorithm With Stochastic Gradient Descent
def back_propagation(train, test, l_rate, n_epoch, n_hidden,v):
  n_inputs = len(train[0]) - 1
  n_outputs = len(set([row[-1] for row in train]))
  global network
  network = initialize_network(n_inputs, n_hidden, n_outputs)
  train_network(network, train, l_rate, n_epoch, n_outputs,v)
  predictions = list()
  for row in test:
    prediction = predict(network, row)
    predictions.append(prediction)
  return(predictions)


In [None]:
#main function
def main():
    #loading the datasets
    data3 = load_csv('p3.csv')
    data2 = load_csv('p2.csv')
    data1 = load_csv('p1.csv')
    #removing the labels fo every dataset
    data1.pop(0)
    data2.pop(0)
    data3.pop(0)

    #converting into float
    #for data 1
    for i in range(len(data1[0])-1):
        str_column_to_float(data1,i)
    #for data 2
    for i in range(len(data2[0])-1):
        str_column_to_float(data2,i)
    #for data 3
    for i in range(len(data3[0])-1):
        str_column_to_float(data3,i)

    #converting class label column in to int
    #for data 1
    str_column_to_int(data1,len(data1[0])-1)
    #for data 2
    str_column_to_int(data2,len(data2[0])-1)
    #for data 3
    str_column_to_int(data3,len(data3[0])-1)

    #normalize input variables
    #for data 1
    minmax1 = dataset_minmax(data1)
    normalize_dataset(data1,minmax1)
    #for data 2
    minmax2 = dataset_minmax(data2)
    normalize_dataset(data2,minmax2)
    #for data 3
    minmax3 = dataset_minmax(data3)
    normalize_dataset(data3,minmax3)

    #evaluate algorithm variables
    n_folds =2
    l_rate = 0.4
    n_epoch =500
    n_hidden =5

    # options
    c = int(input("enter the choice \n0) predicting for more.\n1) predicting for a single.\n\n   "))
    if c == 0:
    	flag = 0
    elif c==1:
        flag = 1
    else:
        print('choice a valid option:')
        main()

    global a
    if flag == 0:
        a = input("enter the file name for predicting:")
        dataset = pd.read_csv(a)
        dataset['Marks']=dataset['Marks'].replace("ABS",0)
        dataset['Marks2']=dataset['Marks2'].replace("ABS",0)
        dataset['Marks']=dataset['Marks'].replace("",0)
        dataset['Marks2']=dataset['Marks2'].replace("",0)
        d1 = dataset[["Marks","Marks2","comp"]]

        dataset.drop(dataset.columns[[0,1,3,8]], axis = 1, inplace = True)
        try:
            d1[["Marks", "Marks2", "comp"]] = d1[["Marks","Marks2","comp"]].apply(pd.to_numeric)
            d1['Marks2'] = np.where( (d1['Marks'] >= d1['Marks2']),(((2*d1['Marks'] + (d1['Marks2']/2)) * 40) / 100),(((2*d1['Marks2'] + (d1['Marks']/2)) * 40) / 100) )
            d1['comp'] = (d1['Marks2']/40)*30 + d1['comp']
            dataset['Marks2'] = d1['Marks2']
            dataset['comp'] = d1['comp']
        except:
            print('Student Acadamic prediction')

        dataset.to_csv('pd3.csv')
        dataset.drop(dataset.columns[[4,21]], axis = 1, inplace =True)

        dataset.to_csv('pd2.csv')

        dataset.drop(dataset.columns[[3,19]], axis = 1, inplace = True)

        dataset.to_csv('pd1.csv')
    else:
        h = list()
        j = ['Course_code','sem','Student_id','age','Marks','Marks2','comp','health_issues','sports','events','inter_score','school','social_acc','age/sem','batch','gender','late_assSub','organizing','coding_skill','speaking_skill','backlogs','att1','att2','att3']
        for i in j:
            if i == 'age/sem':
                h.append(h[3]/h[1])
            elif i == 'Course_code' or i == 'Student_id':
                print('enter field ',i,': ')
                h.append(input())
            else:
                print('enter field ',i,': ')
                h.append(float(input()))
        h.pop(0)
        h.pop(1)

    global r
    # phase 1
    print('\n\nbuilding phase1 model:\n')
    scores = evaluate_algorithm(data1,n_folds,l_rate, n_epoch, n_hidden)
    print("scores: %s" %scores)
    print("mean Acuracy: %.3f%%" % (sum(scores)/float(len(scores))))

    if flag ==0:
        #loading predict data phase 1
        pdata = load_csv('pd1.csv')
        pdata.pop(0)
        #print(pdata)

    	#converting in to float
    	#for data 1
        for i in range(len(pdata[0])-1):
            str_column_to_float(pdata,i)

        #normalize data
        normalize_dataset(pdata,minmax1)

        r1 = list()

        for i in pdata:
            k = predict(network,i)
            r1.append(k)
        r = r1
        print(r1)
    else:
        u = h[:-2]
        print(predict(network,u))
    	#print(predict(network,[0.0, 0.5, 0.5671641791044776, 0.0, 0.9, 0.8020833333333334, 0.0, 1.0, 0.0, 0.5128205128205128, 0.1111111111111111, 1.0, 0.7777777776049382, 0.0, 1.0, 0.15, 0.0, 0.0, 0.0, 0.0, 0.9545454545454546, None]))

    while(1):
        o = input("enter c to continue or enter x to exit\n\t")
        if o == 'c' or o == 'C':
            print("\nphase 2 will include marks and attendance up to mid term 2 ")
            break
        elif o == 'x' or o == 'X':
            return

    #phase 2
    print('\nbuilding phase2 model:\n')
    scores = evaluate_algorithm(data2,n_folds,l_rate, n_epoch, n_hidden)
    print("scores: %s" %scores)
    print("mean Acuracy: %.3f%%" % (sum(scores)/float(len(scores))))

    if flag == 0:
        #loading predict data phase 2
        pdata = load_csv('pd2.csv')
        pdata.pop(0)

    	#converting into float
        #for data 2
        for i in range(len(pdata[0])-1):
            str_column_to_float(pdata,i)

        #normalize data 2 using minmax 2
        normalize_dataset(pdata,minmax2)

        r2 = list()

        for i in pdata:
            k = predict(network,i)
            r2.append(k)
        r = r2
        print(r2)
    else:
        u = h[:-1]
        print(predict(network,u))

        #print(predict(network,[0.0, 0.5, 0.5671641791044776, 0.0, 0.9, 0.8020833333333334, 0.0, 1.0, 0.0, 0.5128205128205128, 0.1111111111111111, 1.0, 0.7777777776049382, 0.0, 1.0, 0.15, 0.0, 0.0, 0.0, 0.0, 0.9545454545454546, 0.9745454545454546, None]))

    while(1):
        o = input('enter c to continue or enter x to exit\n\t')
        if o == 'c' or o == 'C':
            print('\nphase 3 will include marks and attendance upto end of the semester attendance')
            break
        elif o == 'x' or o == 'X':
            return

    #phase 3
    print('\nbuilding phase3 model:\n')
    scores = evaluate_algorithm(data3,n_folds,l_rate, n_epoch, n_hidden)
    print("scores: %s" %scores)
    print("mean Acuracy: %.3f%%" % (sum(scores)/float(len(scores))))

    if flag == 0:
        #loading predict data phase 3
        pdata = load_csv('pd3.csv')
        pdata.pop(0)
        #print(pdata)

        #coverting into float for data 3
        for i in range(len(pdata[0])-1):
        	str_column_to_float(pdata,i)

        #normalize data 3 using minmax 3
        normalize_dataset(pdata,minmax3)

        r3 = list()

        for i in pdata:
        	k = predict(network,i)
        	r3.append(k)
        r = r3
        print(r3)
    else:
        print(predict(network,h))


In [None]:
main()

s = list()
dataset = pd.read_csv(a)
dataset.drop(dataset.columns[[1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25]], axis = 1, inplace = True)

dataset['final_results'] = r
dataset['final_results'] = dataset['final_results'].replace(1,"pass")
dataset['final_results'] = dataset['final_results'].replace(0,"fail")
f = ['need to do hardwork','improve presentation skills','have to study more']

for i in r:
    if  i == 1:
        s.append('no suggestions')
    else:
        s.append(f[math.floor(random()*len(f))])

dataset.insert(3,'SUGG', s, True)
print(dataset)
print('\n\t*** result is also availabe in results folder ***')
dataset.to_csv('results/result.csv')


enter the choice 
0) predicting for more.
1) predicting for a single.

   0
enter the file name for predicting:predictData.csv


training model:   0%|           [time left: 03:49 ]



building phase1 model:



training model: 100%|██████████ [time left: 00:00 ]
testing model : 100%|██████████ [time left: 00:00 ]


scores: [90.74250811223516, 91.1910669975186]
mean Acuracy: 90.967%
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
enter c to continue or enter x to exit
	x
   Course_code  Student_id final_results            SUGG
0     10A70501  10341A0582          pass  no suggestions
1     10A70501  11341A0573          pass  no suggestions
2     10A70501  11341A0574          pass  no suggestions
3     10A70501  11341A0576          pass  no suggestions
4     10A70501  11341A0577          pass  no suggestions
5     10A70501  11341A0578          pass  no suggestions
6     10A70501  11341A0579          pass  no suggestions
7     10A70501  11341A0580          pass  no suggestions
8     10A70501  11341A0581          pass  no suggestions
9     10A70501  11341A0582          pass  no suggestions
10    10A70501  11341A0583          pass  no suggestions
11    10A70501  11341A0584          pass  no suggestions
12    10A70501  11341A0585          pass  no suggestions
13    10A70501  11341A0586          pass  n

FileNotFoundError: ignored