In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

In [None]:
import pandas as pd
df = pd.read_csv("/kaggle/input/daily-crypto-tracker-dataset/daily_crypto_tracker.csv")
print(df.head())
print(df.info())

In [None]:
df_clean = df.drop(columns=['price_change_percentage_24h', 'fetch_date', 'id', 'symbol'])
df_clean = df_clean.dropna()
df_clean['is_gain'] = (
    df_clean['current_price'] > df_clean['current_price'].shift(1).fillna(df_clean['current_price'])
).astype(int)

df_clean = df_clean.dropna()
X = df_clean.drop(columns=['is_gain'])
y = df_clean['is_gain']
print("✅ Features ready:", X.shape)
print("🎯 Target ready:", y.shape)

In [None]:
df.shape

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
print("📂 Train size:", X_train.shape)
print("📂 Test size:", X_test.shape)

In [None]:
#1_Random Forest
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Encode categorical features in X_train and X_test
X_train_encoded = pd.get_dummies(X_train)
X_test_encoded = pd.get_dummies(X_test)

# Align train and test encoded data to have same columns
X_train_encoded, X_test_encoded = X_train_encoded.align(X_test_encoded, join='left', axis=1, fill_value=0)

rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train_encoded, y_train)

rf_preds = rf_model.predict(X_test_encoded)

print("📊 Random Forest Report:")
print(classification_report(y_test, rf_preds))

rf_cm = confusion_matrix(y_test, rf_preds)
sns.heatmap(rf_cm, annot=True, fmt='d', cmap='Blues')
plt.title("Random Forest Confusion Matrix")
plt.show()

accuracy = accuracy_score(y_test, rf_preds)
print("Accuracy:", accuracy)

In [None]:
#2_Logistic Regression
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Encode categorical features
X_train_encoded = pd.get_dummies(X_train)
X_test_encoded = pd.get_dummies(X_test)

# Align columns of train and test sets
X_train_encoded, X_test_encoded = X_train_encoded.align(X_test_encoded, join='left', axis=1, fill_value=0)

log_model = LogisticRegression(max_iter=1000)
log_model.fit(X_train_encoded, y_train)

log_preds = log_model.predict(X_test_encoded)

print("📊 Logistic Regression Report:")
print(classification_report(y_test, log_preds))

log_cm = confusion_matrix(y_test, log_preds)
sns.heatmap(log_cm, annot=True, fmt='d', cmap='Purples')
plt.title("Logistic Regression Confusion Matrix")
plt.show()

accuracy = accuracy_score(y_test, log_preds)
print("Accuracy:", accuracy)

In [None]:
#3_LSTM
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

#=============================
# 1. Encode target variable if categorical
#=============================
if y_train.dtype == 'object':
    le = LabelEncoder()
    y_train = le.fit_transform(y_train)
    y_test = le.transform(y_test)

#=============================
# 2. Encode categorical features
#=============================
X_train_encoded = pd.get_dummies(X_train)
X_test_encoded = pd.get_dummies(X_test)

# Align train/test columns
X_train_encoded, X_test_encoded = X_train_encoded.align(X_test_encoded, join='left', axis=1, fill_value=0)

#=============================
# 3. Scale features
#=============================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_encoded)
X_test_scaled = scaler.transform(X_test_encoded)

#=============================
# 4. Reshape for LSTM [samples, timesteps, features]
#=============================
# এখানে timesteps=1 দেওয়া হলো (যদি multi-step sequence লাগলে পরিবর্তন করো)
X_train_lstm = X_train_scaled.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
X_test_lstm = X_test_scaled.reshape((X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))

#=============================
# 5. Build LSTM model
#=============================
model = Sequential()
model.add(LSTM(64, input_shape=(X_train_lstm.shape[1], X_train_lstm.shape[2]), activation='tanh'))
model.add(Dropout(0.3))
model.add(Dense(1, activation='sigmoid'))  # Binary classification; multi-class হলে softmax

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

#=============================
# 6. Train model
#=============================
history = model.fit(X_train_lstm, y_train,
                    epochs=20,
                    batch_size=16,
                    validation_data=(X_test_lstm, y_test),
                    verbose=1)

#=============================
# 7. Predictions
#=============================
y_pred_prob = model.predict(X_test_lstm)
y_pred = (y_pred_prob > 0.5).astype(int)

#=============================
# 8. Evaluation
#=============================
print("📊 LSTM Report:")
print(classification_report(y_test, y_pred))

lstm_cm = confusion_matrix(y_test, y_pred)
sns.heatmap(lstm_cm, annot=True, fmt='d', cmap='Oranges')
plt.title("LSTM Confusion Matrix")
plt.show()

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

In [None]:
#4_GRU Model Code
import numpy as np
import tensorflow as tf
import random

# 1. Fix seeds for reproducibility
seed = 42
np.random.seed(seed)
tf.random.set_seed(seed)
random.seed(seed)

# 2. Your other imports and code
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Embedding, GRU, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# 3. Your dataset and preprocessing
texts = [
    # Positive examples (20)
    'this is a great movie', 'i really enjoyed this film', 'the movie was fantastic', 'absolutely brilliant plot',
    'i loved this movie', 'one of the best films ever', 'superb acting by the cast', 'a truly moving story',
    'highly recommend this picture', 'wonderful experience from start to finish', 'two thumbs up', 'amazing direction',
    'a must-see film for everyone', 'the visuals were stunning', 'excellent screenplay', 'perfect in every way',
    'i was captivated throughout', 'an unforgettable journey', 'the soundtrack was beautiful', 'masterfully crafted',
    # Negative examples (20)
    'this movie was terrible', 'i did not like the film', 'the acting was awful', 'a complete waste of time',
    'i hated every moment of it', 'the plot was boring and predictable', 'worst film of the year', 'do not watch this',
    'a disappointing experience', 'the script was very weak', 'i fell asleep halfway through', 'a total failure',
    'the characters were one-dimensional', 'it was incredibly slow', 'nothing special about it', 'i want my money back',
    'a confusing and messy plot', 'the ending was horrible', 'poorly executed idea', 'I would not recommend it'
]
labels = np.array([1]*20 + [0]*20)

tokenizer = Tokenizer(num_words=2000, oov_token="<unk>")
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
max_length = max(len(seq) for seq in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')

X_train, X_test, y_train, y_test = train_test_split(
    padded_sequences, labels, test_size=0.2, random_state=seed
)

vocab_size = len(tokenizer.word_index) + 1
embedding_dim = 32

model = Sequential([
    Input(shape=(max_length,)),
    Embedding(input_dim=vocab_size, output_dim=embedding_dim),
    GRU(units=64),
    Dropout(0.5),
    Dense(units=1, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history = model.fit(X_train, y_train,
                    epochs=50,
                    validation_data=(X_test, y_test),
                    callbacks=[early_stopping],
                    verbose=2)

loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Final Test Accuracy: {accuracy*100:.2f}%")

In [None]:
import pandas as pd

# Ensure X_train / X_test are DataFrames
if isinstance(X_train, np.ndarray):
    X_train = pd.DataFrame(X_train)
if isinstance(X_test, np.ndarray):
    X_test = pd.DataFrame(X_test)

# Now safe to handle categoricals
categorical_cols = X_train.select_dtypes(include=['object']).columns

if len(categorical_cols) > 0:
    X_train_encoded = pd.get_dummies(X_train, columns=categorical_cols)
    X_test_encoded = pd.get_dummies(X_test, columns=categorical_cols)
    X_train_encoded, X_test_encoded = X_train_encoded.align(X_test_encoded, join='left', axis=1, fill_value=0)
else:
    X_train_encoded, X_test_encoded = X_train, X_test

In [None]:
#5_XGBoost
import xgboost as xgb
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_auc_score
import seaborn as sns
import matplotlib.pyplot as plt

# Handle categorical only
categorical_cols = X_train.select_dtypes(include=['object']).columns
if len(categorical_cols) > 0:
    X_train_encoded = pd.get_dummies(X_train, columns=categorical_cols)
    X_test_encoded = pd.get_dummies(X_test, columns=categorical_cols)
    X_train_encoded, X_test_encoded = X_train_encoded.align(X_test_encoded, join='left', axis=1, fill_value=0)
else:
    X_train_encoded, X_test_encoded = X_train, X_test

# Convert to DMatrix
dtrain = xgb.DMatrix(X_train_encoded, label=y_train)
dtest = xgb.DMatrix(X_test_encoded, label=y_test)

# Parameters
params = {
    'objective': 'binary:logistic',
    'eval_metric': 'logloss',
    'eta': 0.1,
    'max_depth': 6,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'seed': 42
}

# Train model
xgb_model = xgb.train(params, dtrain, num_boost_round=200,
                      evals=[(dtest, "Test")], early_stopping_rounds=20)

# Predict
xgb_preds_prob = xgb_model.predict(dtest)
xgb_preds = (xgb_preds_prob > 0.5).astype(int)

# Evaluation
print("📊 XGBoost Report:")
print(classification_report(y_test, xgb_preds))

xgb_cm = confusion_matrix(y_test, xgb_preds)
sns.heatmap(xgb_cm, annot=True, fmt='d', cmap='Greens')
plt.title("XGBoost Confusion Matrix")
plt.show()

accuracy = accuracy_score(y_test, xgb_preds)
roc_auc = roc_auc_score(y_test, xgb_preds_prob)
print(f"Accuracy: {accuracy:.4f}")
print(f"ROC-AUC: {roc_auc:.4f}")