## Loading Libraries

In [1]:
library(e1071) # for SVM classifier
library(dplyr) # for data manipulation


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union



## Reading Datasets

In [2]:
# reading train data unigrams and bigrams
docs_train_unigrams = read.csv('docs_train_unigrams.csv')
docs_train_bigrams = read.csv('docs_train_bigrams.csv')

# reading test data unigrams and bigrams
docs_test_unigrams = read.csv('docs_test_unigrams.csv')
docs_test_bigrams = read.csv('docs_test_bigrams.csv')

## Merging Datasets

In [3]:
#merging unigrams and bigrams for train and test data
docs_train = merge(docs_train_unigrams, docs_train_bigrams, by=c("Doc_id","Class_Label"))
docs_test = merge(docs_test_unigrams, docs_test_bigrams, by=c("Doc_id","Class_Label"))

docs_test = subset(docs_test, select = -c(Class_Label)) #removing class_label values cuz it has NAs from the test data 

## Scaling(Normalizing data) for features

In [4]:
#scaling data for train for except Class_Label column
docs_train[, !names(docs_train) %in% c("Doc_id","Class_Label")] <- scale(docs_train[, !names(docs_train) %in% c("Doc_id","Class_Label")])

#scaling test data
docs_test[, !names(docs_test) %in% c("Doc_id")] <- scale(docs_test[, !names(docs_test) %in% c("Doc_id")])

In [5]:
# dimensions of train and test data
dim(docs_train)
dim(docs_test)

## Splitting train data into sample train and test data sets

In [6]:
#generating indices and splitting train data into sample train and sample test data sets
sample_ind = sample(nrow(docs_train), 0.8*nrow(docs_train), replace = FALSE)
sample_train = docs_train[sample_ind,] 
sample_test = docs_train[-sample_ind,] 

In [7]:
#dimensions of sample train, sample test
dim(sample_train)
dim(sample_test)

In [8]:
# converting Class labels into factors
sample_train[["Class_Label"]] = factor(sample_train[["Class_Label"]])

## SVM Classifier for the split train and test data

### SVM Classifier

In [None]:
# building the classifier
svm.fit <- svm(Class_Label ~ .-Doc_id , sample_train, kernel = "radial", cost = 2)

### Prediction of labels for both train and test data 

In [None]:
# prediction of sample train data using the built classifier
sample_train$predicted_label <- predict(svm.fit, sample_train)

In [None]:
# prediction of sample test data using the built classifier
sample_test$predicted_label <- predict(svm.fit, sample_test)

### Confusion matrices for train and test data

In [None]:
# Confusion Matrix for train data
conf_train = table(sample_train$Class_Label, sample_train$predicted_label)

In [None]:
# Confusion Matrix for test data
conf_test = table(sample_test$Class_Label, sample_test$predicted_label)

### Train and Test Accuracy

In [None]:
N_train <- nrow(sample_train)
diag_train = diag(conf_train)
train_accuracy = sum(diag_train)/N_train
round(train_accuracy*100, 2) # train accuracy

In [None]:
N_test <- nrow(sample_test)
diag_test = diag(conf_test)
test_accuracy = sum(diag_test)/N_test
round(test_accuracy*100, 2) # train accuracy

### Precision and Recall for test data

In [None]:
# matrix computations
rowsums = apply(conf_test, 1, sum)
colsums = apply(conf_test, 2, sum)

In [None]:
# Precision for test data
Precision = diag/ colsums
Precision = round(Precision, 2)
round(mean(Precision),2)

In [None]:
# Recall for test data
Recall = diag / rowsums
Recall = round(Recall, 2)
round(mean(Recall),2)

### F-Score

In [None]:
f.score = (2*Precision*Recall)/(Precision + Recall)
round(mean(f.score),2)

## Building SVM Classifier on whole data and Prediction of test labels

In [None]:
# building the classifier on whole data
svm.fit.final <- svm(Class_Label ~ .-Doc_id , docs_train, kernel = "radial", cost = 2)

In [None]:
# predicting class labels
docs_test$predicted_label <- predict(svm.fit.final, docs_test)

In [None]:
head(docs_test)

## Writing test labels to csv

In [None]:
docs_test_labels <- subset(docs_test, select = c(Doc_id,predicted_label))

In [None]:
head(docs_test_labels)

In [None]:
write.table(docs_test_labels,"testing_labels_pred.txt",sep = " ", col.names=FALSE,row.names=FALSE, quote = FALSE)