### Loaing Necessary Libraries

In [None]:
import re
import string
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, learning_curve
from sklearn.linear_model import LogisticRegression
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import f1_score
from sklearn.model_selection import ShuffleSplit
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer

from tensorflow.keras.preprocessing import image

from PIL import ImageFile, ImageOps
ImageFile.LOAD_TRUNCATED_IMAGES = True

### Reading Image Info from CSV and Cleaning

In [None]:
df = pd.read_csv('../input/memotion-dataset-7k/memotion_dataset_7k/labels.csv')
df.drop(df.columns[df.columns.str.contains('unnamed',case = False)],axis = 1, inplace = True)
df = df.drop(columns = ['text_ocr'])
df.head()

In [None]:
df[df.isnull().any(axis=1)]

In [None]:
cleaned = df.copy()
cleaned.dropna(inplace=True)
cleaned.isnull().any()

In [None]:
def get_image(dataframe):
    
    dataframe.dropna(inplace=True)
    
    width = 100
    height = 100
    X = []
    path = '../input/memotion-dataset-7k/memotion_dataset_7k/images/'+dataframe['image_name']
    
    for i in tqdm(range(dataframe.shape[0])):
        if i in [119, 4799, 6781, 6784, 6786]:
            pass
        else:
            img = image.load_img(path[i],target_size=(width,height,3))
            img = ImageOps.grayscale(img)
            img = image.img_to_array(img)
            img = img/255.0
            X.append(img)

    X = np.array(X)
    X = X.reshape(X.shape[0], 100*100)
    
    rows_to_drop = ['image_120.jpg', 'image_4800.jpg', 'image_6782.jpg', 'image_6785.jpg', 'image_6787.jpg',
                    'image_6988.jpg', 'image_6989.jpg', 'image_6990.png', 'image_6991.jpg', 'image_6992.jpg']
    
    for images in rows_to_drop:
        dataframe.drop(dataframe[dataframe['image_name'] == images].index, inplace=True)
        
    text_data = CountVectorizer().fit_transform(dataframe['text_corrected'].values)
    text_data = TfidfTransformer().fit_transform(text_data).toarray()
    
    features = np.hstack((X, text_data))
    
    return features

In [None]:
X = get_image(cleaned)

In [None]:
X.shape

In [None]:
def create_target(dataframe):
    target_A = dataframe.copy()['overall_sentiment']
    target_A = pd.get_dummies(target_A)
    
    target_B = dataframe.copy()
    target_B = target_B.replace({'humour': {'not_funny': 0, 'funny': 1, 'very_funny': 1, 'hilarious':1},
                        'sarcasm': {'not_sarcastic': 0, 'general': 1, 'twisted_meaning': 1, 'very_twisted': 1},
                        'offensive': {'not_offensive': 0, 'slight': 1, 'very_offensive': 1, 'hateful_offensive': 1},
                        'motivational': {'not_motivational': 0, 'motivational': 1}})
    target_B = target_B.iloc[:,2:6]
    
    df1 = pd.get_dummies(cleaned['sarcasm'])
    df2 = pd.get_dummies(cleaned['humour'])
    df3 = pd.get_dummies(cleaned['offensive'])
    df4 = pd.get_dummies(cleaned['offensive'])
    frames = [df1, df2, df3, df4]
    target_C = pd.concat(frames, axis=1)
    
    return target_A, target_B, target_C

In [None]:
target_A, target_B, target_C = create_target(cleaned)

In [None]:
from sklearn.metrics import f1_score, multilabel_confusion_matrix

### Logistic Regression

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, target_A.values, test_size = 0.2, stratify=target_A)

clasifier_A = MultiOutputClassifier(LogisticRegression(max_iter=10000)).fit(X_train, y_train)

prediction = clasifier_A.predict(X_test)

print(f1_score(y_test, prediction, average='micro'))
print(f1_score(y_test, prediction, average='macro'))

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, target_B.values, test_size = 0.2, stratify=target_B)

clasifier_B = MultiOutputClassifier(LogisticRegression(max_iter=10000)).fit(X_train, y_train)
prediction = clasifier_B.predict(X_test)

print(f1_score(y_test, prediction, average='micro'))
print(f1_score(y_test, prediction, average='macro'))

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, target_C.values, test_size = 0.2, stratify=target_C)

clasifier_C = MultiOutputClassifier(LogisticRegression(max_iter=10000)).fit(X_train, y_train)
prediction = clasifier_C.predict(X_test)

print(f1_score(y_test, prediction, average='micro'))
print(f1_score(y_test, prediction, average='macro'))

### Random Forest

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, target_A.values, test_size = 0.2, stratify=target_A)

clasifier_A = MultiOutputClassifier(RandomForestClassifier()).fit(X_train, y_train)
prediction = clasifier_A.predict(X_test)

print(f1_score(y_test, prediction, average='micro'))
print(f1_score(y_test, prediction, average='macro'))

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, target_B.values, test_size = 0.2, stratify=target_B)

clasifier_B = MultiOutputClassifier(RandomForestClassifier()).fit(X_train, y_train)
prediction = clasifier_B.predict(X_test)

print(f1_score(y_test, prediction, average='micro'))
print(f1_score(y_test, prediction, average='macro'))

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, target_C.values, test_size = 0.2, stratify=target_C)

clasifier_C = MultiOutputClassifier(RandomForestClassifier()).fit(X_train, y_train)
prediction = clasifier_C.predict(X_test)

print(f1_score(y_test, prediction, average='micro'))
print(f1_score(y_test, prediction, average='macro'))

### Decision Tree 

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, target_A.values, test_size = 0.2, stratify=target_A)

clasifier_A = MultiOutputClassifier(DecisionTreeClassifier()).fit(X_train, y_train)
prediction = clasifier_A.predict(X_test)

print(f1_score(y_test, prediction, average='micro'))
print(f1_score(y_test, prediction, average='macro'))

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, target_B.values, test_size = 0.2, stratify=target_B)

clasifier_B = MultiOutputClassifier(DecisionTreeClassifier()).fit(X_train, y_train)
prediction = clasifier_B.predict(X_test)

print(f1_score(y_test, prediction, average='micro'))
print(f1_score(y_test, prediction, average='macro'))

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, target_C.values, test_size = 0.2, stratify=target_C)

clasifier_C = MultiOutputClassifier(DecisionTreeClassifier()).fit(X_train, y_train)
prediction = clasifier_C.predict(X_test)

print(f1_score(y_test, prediction, average='micro'))
print(f1_score(y_test, prediction, average='macro'))