In [5]:
from matplotlib import pyplot
from keras.datasets import cifar10
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Dropout
from keras.layers import BatchNormalization
import tensorflow as tf
from tensorflow.python.keras.applications.resnet import ResNet50


def get_dataset():
    (trainX, y_train), (testX, y_test) = cifar10.load_data()
    y_train = tf.keras.utils.to_categorical(y_train)
    y_test = tf.keras.utils.to_categorical(y_test)
    return trainX, y_train, testX, y_test


def preprocess_data(train_data, test_data):
    train_normalize = train_data.astype('float32')
    test_normalize = test_data.astype('float32')
    train_normalize = train_normalize / 255.0
    test_normalize = test_normalize / 255.0
    return train_normalize, test_normalize


def get_base_model():
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(32, 32, 3)))
    model.add(BatchNormalization())
    model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.2))
    model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.3))
    model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.4))
    model.add(Flatten())
    model.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='softmax'))
    optimizer_ = tf.keras.optimizers.SGD(lr=0.001, momentum=0.9)
    model.compile(optimizer=optimizer_, loss='categorical_crossentropy', metrics=['accuracy'])
    return model


def plot_history(history):
    pyplot.subplot(211)
    pyplot.title('Loss')
    pyplot.plot(history.history['loss'], color='red', label='Train')
    pyplot.plot(history.history['val_loss'], color='green', label='Test')
    pyplot.subplot(212)
    pyplot.title('Accuracy')
    pyplot.plot(history.history['accuracy'], color='red', label='Train')
    pyplot.plot(history.history['val_accuracy'], color='green', label='Test')
    pyplot.show()


def transfer_learning_model():
    base_model_2 = ResNet50(include_top=False, weights='imagenet', input_shape=(32, 32, 3), classes=10)
    model_2 = Sequential()
    model_2.add(base_model_2)
    model_2.add(Flatten())

    model_2.add(Dense(4000, activation=('relu'), input_dim=512))
    model_2.add(Dense(2000, activation=('relu')))
    model_2.add(Dropout(.4))
    model_2.add(Dense(1000, activation=('relu')))
    model_2.add(Dropout(.3))
    model_2.add(Dense(500, activation=('relu')))
    model_2.add(Dropout(.2))
    model_2.add(Dense(10, activation=('softmax')))
    optimizer_ = tf.keras.optimizers.SGD()
    model_2.compile(optimizer=optimizer_, loss='categorical_crossentropy', metrics=['accuracy'])
    return model_2


def run_task1():
    X_train, y_train, X_test, y_test = get_dataset()
    X_train, X_test = preprocess_data(X_train, X_test)

    model = get_base_model()
    manual_datagen = ImageDataGenerator(width_shift_range=0.1, height_shift_range=0.1, horizontal_flip=True)

    train_dataset = manual_datagen.flow(X_train, y_train, batch_size=64)
    steps = int(X_train.shape[0] / 64)

    history = model.fit(train_dataset, steps_per_epoch=steps, epochs=2, validation_data=(X_test, y_test), verbose=1)

    _, acc = model.evaluate(X_test, y_test, verbose=0)
    print(f'Accuracy is {round(float(acc)*100, 3)}')
    plot_history(history)

    print('Starting ResNet 50 ')
    model_2 = transfer_learning_model()
    history = model_2.fit(train_dataset, steps_per_epoch=steps, epochs=2, validation_data=(X_test, y_test), verbose=1)
    print(f'Accuracy is {round(float(acc)*100, 3)}')
    plot_history(history)

run_task1()

2022-03-24 12:35:18.886561: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-03-24 12:35:18.886633: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [8]:
import pandas as pd
import string
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

stop_words = stopwords.words('english')
porter = PorterStemmer()

def processing_string(row, col):
    word = row[col]
    word = str(word).lower()
    text_p = "".join([char for char in word if char not in string.punctuation])
    filtered_words = [word for word in [text_p] if word not in stop_words]
    stemmed = [porter.stem(word) for word in filtered_words]
    if isinstance(stemmed, list):
        row[col] = ''.join(stemmed)
    else:
        row[col] = stemmed
    return row


df = pd.read_csv('agr_en_train.csv', header=None)
df.columns = ['sent1', 'sent2', 'label']

df = df.apply(lambda x: processing_string(x, 'sent2'), axis=1)
df = df.drop('sent1', axis=1)

tfidf = TfidfVectorizer(min_df=5, max_df=0.8)
yy = tfidf.fit_transform(df['sent2'])

df_processd = pd.DataFrame(yy.toarray(), columns=tfidf.get_feature_names())
X_train, X_test, y_train, y_test = train_test_split(df_processd, df['label'], test_size=0.2)

def build_randomforest_model(X_train, y_train, X_test, y_test):
    print("Started Random Forest Classifier ...")
    model = RandomForestClassifier(n_jobs=-1)
    model.fit(X_train, y_train)
    print("Completed!")
    print('Training Accuracy Score', model.score(X_train, y_train))
    print('*' * 40)
    return model

random_forest = build_randomforest_model(X_train, y_train, X_test, y_test)

Started Random Forest Classifier ...
Completed!
Training Accuracy Score 0.9896864256693405
****************************************
