# Importing Necessary Libraries

In [31]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Importing the dataset

In [32]:
df = pd.read_csv("/content/results2.csv")

# Dataset Exploration

In [33]:
df.shape

(21426, 5)

In [34]:
df.head()

Unnamed: 0,name,price,change_24h,volume_24h,market_cap
0,LOAF,$0.01846,,$29.05 B,
1,Bitcoin,"$70,857.06",-0.06%,$14.58 B,"$1,396.46 B"
2,Ethereum,"$3,855.52",1.86%,$61.94 B,$463.24 B
3,Tether,$0.9995,0.03%,$2.96 B,$112.34 B
4,BNB,$694.10,-0.55%,$1.99 B,$102.44 B


In [35]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21426 entries, 0 to 21425
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   name        21426 non-null  object
 1   price       21426 non-null  object
 2   change_24h  21410 non-null  object
 3   volume_24h  8631 non-null   object
 4   market_cap  4427 non-null   object
dtypes: object(5)
memory usage: 837.1+ KB


In [36]:
df.describe()

Unnamed: 0,name,price,change_24h,volume_24h,market_cap
count,21426,21426,21410,8631,4427
unique,20866,17274,2950,6386,3809
top,MAGA,$1.00,0.00%,$2,$1.29 M
freq,14,47,12143,69,10


In [43]:
df.isnull().sum()

name              0
price             0
change_24h       16
volume_24h    12795
market_cap    16999
dtype: int64

In [37]:
df.dtypes

name          object
price         object
change_24h    object
volume_24h    object
market_cap    object
dtype: object

# Converting to Appropiate Datatypes

In [38]:
# Convert price to float
df['price'] = pd.to_numeric(df['price'].str.replace(',', '').str.replace('$', ''), errors='coerce')

# Convert change_24h to float (strip '%' and convert)
df['change_24h'] = pd.to_numeric(df['change_24h'].str.replace('%', ''), errors='coerce')

# Convert volume_24h to float (strip commas and handle units)
def convert_volume(value):
    if 'K' in value:
        return float(value.replace('K', '').replace('$', '').replace(',', '')) * 1e3
    elif 'M' in value:
        return float(value.replace('M', '').replace('$', '').replace(',', '')) * 1e6
    elif 'B' in value:
        return float(value.replace('B', '').replace('$', '').replace(',', '')) * 1e9
    else:
        return float(value.replace('$', '').replace(',', ''))

df['volume_24h'] = df['volume_24h'].apply(lambda x: convert_volume(x) if pd.notnull(x) else x)

# Convert market_cap to float (strip commas and handle units)
def convert_market_cap(value):
    if 'K' in value:
        return float(value.replace('K', '').replace('$', '').replace(',', '')) * 1e3
    elif 'M' in value:
        return float(value.replace('M', '').replace('$', '').replace(',', '')) * 1e6
    elif 'B' in value:
        return float(value.replace('B', '').replace('$', '').replace(',', '')) * 1e9
    else:
        return float(value.replace('$', '').replace(',', ''))

df['market_cap'] = df['market_cap'].apply(lambda x: convert_market_cap(x) if pd.notnull(x) else x)


In [39]:
df.dtypes

name           object
price         float64
change_24h    float64
volume_24h    float64
market_cap    float64
dtype: object

In [40]:
df.head()

Unnamed: 0,name,price,change_24h,volume_24h,market_cap
0,LOAF,0.01846,,29050000000.0,
1,Bitcoin,70857.06,-0.06,14580000000.0,1396460000000.0
2,Ethereum,3855.52,1.86,61940000000.0,463240000000.0
3,Tether,0.9995,0.03,2960000000.0,112340000000.0
4,BNB,694.1,-0.55,1990000000.0,102440000000.0


# Handling Missing Values

In [41]:
from sklearn.impute import SimpleImputer

In [45]:
# Select only numeric columns for imputation
numeric_columns = df.select_dtypes(include=['float64', 'int64']).columns

# Impute missing values with mean for numeric columns
imputer = SimpleImputer(strategy='mean')
df[numeric_columns] = imputer.fit_transform(df[numeric_columns])

In [46]:
df.isnull().sum()

name          0
price         0
change_24h    0
volume_24h    0
market_cap    0
dtype: int64

In [47]:
df.head()

Unnamed: 0,name,price,change_24h,volume_24h,market_cap
0,LOAF,0.01846,3.928317,29050000000.0,636165700.0
1,Bitcoin,70857.06,-0.06,14580000000.0,1396460000000.0
2,Ethereum,3855.52,1.86,61940000000.0,463240000000.0
3,Tether,0.9995,0.03,2960000000.0,112340000000.0
4,BNB,694.1,-0.55,1990000000.0,102440000000.0


# Model Building

In [52]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Load the preprocessed dataset (X, y)
# Replace 'df' with your DataFrame containing the features and target variable
X = df[['change_24h', 'volume_24h', 'market_cap']]
y = df['price']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features (optional but recommended for neural networks)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the neural network architecture
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dropout(0.2),  # Optional dropout layer for regularization
    Dense(32, activation='relu'),
    Dropout(0.2),
    Dense(1)  # Output layer (single neuron for regression)
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

# Train the model
history = model.fit(X_train_scaled, y_train, batch_size=32, epochs=50, validation_split=0.2, verbose=1)

# Evaluate the model on the test set
loss = model.evaluate(X_test_scaled, y_test, verbose=0)
print(f'Test Loss: {loss}')

# Optionally, save the trained model
model.save('crypto_prediction_model.h5')


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test Loss: 7939258880.0


  saving_api.save_model(


# Evaluation Metrics

In [53]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Make predictions on the test set
y_pred = model.predict(X_test_scaled)

# Calculate mean absolute error
mae = mean_absolute_error(y_test, y_pred)

# Calculate mean squared error
mse = mean_squared_error(y_test, y_pred)

# Calculate root mean squared error
rmse = np.sqrt(mse)

# Calculate R-squared value
r2 = r2_score(y_test, y_pred)

print(f'Mean Absolute Error (MAE): {mae}')
print(f'Mean Squared Error (MSE): {mse}')
print(f'Root Mean Squared Error (RMSE): {rmse}')
print(f'R-squared value: {r2}')


Mean Absolute Error (MAE): 2242.3915786181633
Mean Squared Error (MSE): 7939263027.670171
Root Mean Squared Error (RMSE): 89102.54220655082
R-squared value: -0.00010653201091548326


In [57]:
# Assuming df contains your cleaned DataFrame
df.to_csv('cleaned_data.csv', index=False)