In [125]:
import pandas as pd
import numpy as np

In [126]:
df=pd.read_csv('/content/finally.csv')

In [127]:
df.head()

Unnamed: 0,likes,hour,word_count,char_count,polarity,subjectivity,org_mentions,place_mentions,date_mentions,time_mentions,...,has_photo,has_video,has_gif,day,weekday,month,year,is_weekend,is_morning,is_evening
0,1,0,29,181,0.175,0.325,0,0,1,1,...,1,0,0,12,5,12,2020,1,0,0
1,2750,10,10,73,0.0,0.0,0,0,0,0,...,1,0,0,30,5,6,2018,1,1,0
2,57,19,14,104,-0.1,0.1,0,0,0,0,...,1,0,0,29,1,9,2020,0,0,1
3,152,11,22,140,0.5,0.9,0,1,1,0,...,1,0,0,1,3,10,2020,0,1,0
4,41,14,26,199,0.0625,0.083333,0,0,1,0,...,1,0,0,19,4,10,2018,0,0,0


In [128]:
import re
import ast

def fix_embedding_string(s):
    # Remove any newline characters and extra spaces
    s = s.replace('\n', ' ').strip()

    # Insert commas between float numbers using regex
    # This inserts commas between two numbers that are separated by whitespace
    s = re.sub(r'(?<=\d)\s+(?=[\-\d])', ', ', s)

    return ast.literal_eval(s)  # Convert the string to an actual Python list


In [129]:
df['bert_embedding'] = df['bert_embedding'].apply(fix_embedding_string)


In [130]:
bert_df = pd.DataFrame(df['bert_embedding'].tolist(), index=df.index)
bert_df.columns = [f'bert_{i}' for i in range(bert_df.shape[1])]

# Add the new bert columns to df
df = df.join(bert_df)

# Drop the original bert_embedding column
df = df.drop(columns=['bert_embedding'])

print(df.shape)

(17331, 815)


In [131]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
import tensorflow as tf

# Log-Cosh loss
def log_cosh_loss(y_true, y_pred):
    return tf.reduce_mean(tf.math.log(tf.cosh(y_pred - y_true)))

# Prepare data
y = np.log1p(df['likes'])              # log1p target
X = df.drop('likes', axis=1)           # features

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

# Model
model = Sequential([
    Dense(512, activation='relu', input_shape=(X_train.shape[1],)),
    BatchNormalization(),
    Dropout(0.3),
    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(1)
])

# Compile
model.compile(optimizer=Adam(learning_rate=0.001), loss=log_cosh_loss)

# Callbacks
lr_scheduler = ReduceLROnPlateau(
    monitor='val_loss', factor=0.5, patience=3,
    verbose=1, min_lr=1e-6
)
early_stop = EarlyStopping(
    monitor='val_loss', patience=6,
    restore_best_weights=True, verbose=1
)

# Train
model.fit(
    X_train, y_train,
    validation_split=0.1,
    epochs=100,
    batch_size=32,
    callbacks=[lr_scheduler, early_stop],
    verbose=1
)

# Predict
y_pred_log = model.predict(X_test).flatten()
y_pred = np.expm1(y_pred_log)
y_true = np.expm1(y_test)

# RMSE
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
print(f"Optimized Neural Net RMSE: {rmse:.2f}")


Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 13ms/step - loss: 1.6543 - val_loss: 0.8976 - learning_rate: 0.0010
Epoch 2/100
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - loss: 0.9607 - val_loss: 0.8415 - learning_rate: 0.0010
Epoch 3/100
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 15ms/step - loss: 0.8432 - val_loss: 0.7475 - learning_rate: 0.0010
Epoch 4/100
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 12ms/step - loss: 0.7549 - val_loss: 0.7275 - learning_rate: 0.0010
Epoch 5/100
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 14ms/step - loss: 0.7075 - val_loss: 0.7266 - learning_rate: 0.0010
Epoch 6/100
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 14ms/step - loss: 0.6544 - val_loss: 0.6680 - learning_rate: 0.0010
Epoch 7/100
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 11ms/step - loss: 0.6291 - val_loss: 0.6845 - learning_ra

In [142]:
row = df.iloc[[999]].drop('likes', axis=1)

row_scaled = scaler.transform(row)
pred_log = model.predict(row_scaled).flatten()[0]

pred_likes = np.expm1(pred_log)

print(f"Predicted likes for row 3: {pred_likes:.2f}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
Predicted likes for row 3: 89.41


In [143]:
model.save("op.keras")
from google.colab import files
files.download("op.keras")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>