<a href="https://colab.research.google.com/github/sanskruti-1234/Deep-Learning/blob/main/Practical_5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Dropout
from tensorflow.keras.utils import set_random_seed


In [6]:
set_random_seed(42)

In [7]:
try:
    df = pd.read_csv('process_data - process_data.csv')
    print("Data loaded successfully. Initial rows:")
    print(df.head())
except FileNotFoundError:
    print("Error: 'process_data - process_data.csv' not found. Please ensure the file is accessible.")
    exit()

Data loaded successfully. Initial rows:
       Gene     Sample  FPKM description        tissue metastasis
0    TSPAN6  CA.102548  0.93   CA.102548  breast tumor        yes
1      TNMD  CA.102548  0.00   CA.102548  breast tumor        yes
2      DPM1  CA.102548  0.00   CA.102548  breast tumor        yes
3     SCYL3  CA.102548  5.78   CA.102548  breast tumor        yes
4  C1orf112  CA.102548  2.83   CA.102548  breast tumor        yes


In [24]:
data_pivot = df.pivot_table(
    index=['Sample', 'tissue'],
    columns='Gene',
    values='FPKM',
    fill_value=0
).reset_index()

In [9]:
X_data = data_pivot.drop(columns=['Sample', 'tissue'])
y_data_raw = data_pivot['tissue']


In [10]:
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y_data_raw)
print("\nEncoded target classes:", dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_))))


Encoded target classes: {'breast tumor': np.int64(0), 'normal breast tissue': np.int64(1)}


In [12]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_data)
print(f"\nOriginal feature shape (Samples x Genes): {X_scaled.shape}")


Original feature shape (Samples x Genes): (52, 20246)


In [14]:
num_samples = X_scaled.shape[0]
num_timesteps = X_scaled.shape[1]
num_features = 1

X_rnn = X_scaled.reshape(num_samples, num_timesteps, num_features)
print(f"Reshaped RNN input shape (Samples, Timesteps, Features): {X_rnn.shape}")

Reshaped RNN input shape (Samples, Timesteps, Features): (52, 20246, 1)


In [15]:
X_train, X_test, y_train, y_test = train_test_split(
    X_rnn, y, test_size=0.2, random_state=42, stratify=y
)
print(f"Train samples: {X_train.shape[0]}, Test samples: {X_test.shape[0]}")

Train samples: 41, Test samples: 11


In [16]:
model = Sequential([
    SimpleRNN(units=64, input_shape=(num_timesteps, num_features)),
    Dropout(0.3),
    Dense(units=32, activation='relu'),
    Dropout(0.3),
    Dense(units=1, activation='sigmoid')
])

  super().__init__(**kwargs)


In [17]:
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

print("\nModel Summary:")
model.summary()


Model Summary:


In [18]:
history = model.fit(
    X_train,
    y_train,
    epochs=20,
    batch_size=32,
    validation_split=0.1,
    verbose=2
)

Epoch 1/20
2/2 - 10s - 5s/step - accuracy: 0.5556 - loss: 0.7339 - val_accuracy: 0.6000 - val_loss: 0.6107
Epoch 2/20
2/2 - 10s - 5s/step - accuracy: 0.7778 - loss: 0.5899 - val_accuracy: 1.0000 - val_loss: 0.5609
Epoch 3/20
2/2 - 7s - 3s/step - accuracy: 0.6111 - loss: 0.6017 - val_accuracy: 1.0000 - val_loss: 0.5207
Epoch 4/20
2/2 - 11s - 5s/step - accuracy: 0.7778 - loss: 0.5575 - val_accuracy: 1.0000 - val_loss: 0.4820
Epoch 5/20
2/2 - 11s - 5s/step - accuracy: 0.8611 - loss: 0.5402 - val_accuracy: 1.0000 - val_loss: 0.4480
Epoch 6/20
2/2 - 10s - 5s/step - accuracy: 0.8333 - loss: 0.4652 - val_accuracy: 1.0000 - val_loss: 0.4202
Epoch 7/20
2/2 - 7s - 3s/step - accuracy: 0.8333 - loss: 0.4843 - val_accuracy: 1.0000 - val_loss: 0.3919
Epoch 8/20
2/2 - 7s - 4s/step - accuracy: 0.8333 - loss: 0.4597 - val_accuracy: 1.0000 - val_loss: 0.3615
Epoch 9/20
2/2 - 6s - 3s/step - accuracy: 0.8611 - loss: 0.4201 - val_accuracy: 1.0000 - val_loss: 0.3385
Epoch 10/20
2/2 - 11s - 6s/step - accurac

In [20]:
print("\nEvaluating model on test data...")
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")



Evaluating model on test data...
Test Loss: 0.3391
Test Accuracy: 0.9091


In [22]:
y_pred_proba = model.predict(X_test)
y_pred = (y_pred_proba > 0.5).astype(int)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step


In [23]:
from sklearn.metrics import classification_report
print("\nClassification Report on Test Data:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))



Classification Report on Test Data:
                      precision    recall  f1-score   support

        breast tumor       1.00      0.83      0.91         6
normal breast tissue       0.83      1.00      0.91         5

            accuracy                           0.91        11
           macro avg       0.92      0.92      0.91        11
        weighted avg       0.92      0.91      0.91        11

