In [2]:
from keras.models import Model
from keras.layers import Input, Dense, Concatenate
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import OneHotEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from scipy.sparse import hstack
import numpy as np
import pandas as pd

df = pd.read_csv("train.csv")
df['author'].fillna('Unknown', inplace=True)
df['title'].fillna('Ambiguous', inplace=True)
df['text'].fillna('Ambiguous', inplace=True)
df.drop_duplicates(inplace=True)

# TF-IDF Vectorization for text and title
vectorizer_text = TfidfVectorizer(max_features=5000)
X_text = vectorizer_text.fit_transform(df['text']).toarray()

vectorizer_title = TfidfVectorizer(max_features=1000)
X_title = vectorizer_title.fit_transform(df['title']).toarray()

# One-hot encoding for authors
encoder = OneHotEncoder()
X_author = encoder.fit_transform(df[['author']]).toarray()

# Train-test split
y = df['label'].values
X_train_text, X_test_text, X_train_title, X_test_title, X_train_author, X_test_author, y_train, y_test = train_test_split(
    X_text, X_title, X_author, y, test_size=0.2, random_state=42)

# Neural Network Architecture
input_text = Input(shape=(X_train_text.shape[1],))
input_title = Input(shape=(X_train_title.shape[1],))
input_author = Input(shape=(X_train_author.shape[1],))

# Layers for text
x1 = Dense(128, activation='relu')(input_text)
x1 = Dense(64, activation='relu')(x1)

# Layers for title
x2 = Dense(128, activation='relu')(input_title)
x2 = Dense(64, activation='relu')(x2)

# Layers for author
x3 = Dense(128, activation='relu')(input_author)
x3 = Dense(64, activation='relu')(x3)

# Concatenate
concat = Concatenate()([x1, x2, x3])

# Final layers
out = Dense(64, activation='relu')(concat)
out = Dense(1, activation='sigmoid')(out)

# Compile model
model = Model(inputs=[input_text, input_title, input_author], outputs=out)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train model
model.fit([X_train_text, X_train_title, X_train_author], y_train, epochs=10, batch_size=32)

# Evaluate model
score = model.evaluate([X_test_text, X_test_title, X_test_author], y_test)
print(f"Test Accuracy: {score[1]}")

y_pred = model.predict([X_test_text, X_test_title, X_test_author])
y_pred = np.round(y_pred).flatten()  # Round the probabilities to get binary class labels

# Print classification report
print(classification_report(y_test, y_pred))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Accuracy: 0.9978365302085876
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2132
           1       1.00      1.00      1.00      2028

    accuracy                           1.00      4160
   macro avg       1.00      1.00      1.00      4160
weighted avg       1.00      1.00      1.00      4160



In [14]:
# t_X_text = vectorizer_text.fit_transform(['''In an anti-U.S. speech, Putin said his most powerful missiles, "Satan-2" and "Flying Chernobyl," are ready to be launched in an ominous warning and show of aggression towards the West.
# The Russian dictator told a conference in Sochi, "From the moment the launch of missiles is detected, no matter where it comes from — from any point of the world ocean or from any territory — such a number, so many hundreds of our missiles appear in the air in a retaliatory strike that there is no chance of survival there will be no single enemy left, and in several directions at once."
# Putin urged the U.S. to understand that any threats against Russia are "absolutely unacceptable for any potential aggressor."
# He also mentioned the possibility of Russia withdrawing from the nuclear test ban treaty, which could lead to the country conducting major weapon tests, possibly in the Arctic.''']).toarray()

# t_X_title = vectorizer_title.fit_transform([''''No Chance of Survival': Putin Warns West of Nuclear Destruction, Threatens Use of Russia's Most Powerful Missiles in Retaliation''']).toarray()

# t_X_author = encoder.fit_transform([['unknow']])

# # print(vectorizer_text.fit_transform([(df['text'])[0]]).toarray())
# # t_X_title = vectorizer_title.fit_transform(df['title']).toarray()
# # t_X_author = encoder.fit_transform(df[['author']]).toarray()

# print(t_X_text)
# print(t_X_title)
# print(t_X_author)

# y_pred = model.predict([t_X_text, t_X_title, t_X_author])

y_pred = model.predict([X_test_text[0], X_test_title[0], X_test_author[0]])


