In [3]:
import pandas as pd
df = pd.read_csv('49_updated.csv')
df = df.drop('TIME', axis=1)

In [4]:
df.head()

Unnamed: 0,a,b,c,d,e,f,g,h,i,j,k,l,m,o,group_status
0,107.136536,100.0,134.133804,2.854385,576.762146,99.946281,44.625851,46.853638,104.717056,106.523125,107.030479,131.141632,1.689493,1,1
1,105.134583,100.0,160.002411,3.367386,510.683624,107.427765,54.109188,-0.69275,108.977722,97.88475,98.32534,135.435562,1.835044,1,1
2,158.74939,100.0,177.491074,4.161911,489.616302,111.045418,51.477051,-0.69275,112.985031,127.458191,127.96553,138.692169,2.040076,1,1
3,218.557755,100.0,230.25087,6.420364,500.774811,120.365662,55.490112,-0.585938,118.745735,120.074837,120.488739,158.166794,2.533844,1,1
4,207.734665,100.0,236.822556,6.05751,497.256439,124.775787,56.008911,-0.585938,125.489731,119.607536,120.061485,156.594208,2.461201,1,1


In [11]:
!pip install -U huggingface_hub

Collecting huggingface_hub
  Downloading huggingface_hub-0.26.2-py3-none-any.whl.metadata (13 kB)
Downloading huggingface_hub-0.26.2-py3-none-any.whl (447 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m447.5/447.5 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0mm
[?25hInstalling collected packages: huggingface_hub
  Attempting uninstall: huggingface_hub
    Found existing installation: huggingface-hub 0.20.3
    Uninstalling huggingface-hub-0.20.3:
      Successfully uninstalled huggingface-hub-0.20.3
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
llava 1.1.3 requires pydantic<2,>=1, but you have pydantic 2.9.2 which is incompatible.
videollava 1.0.0 requires gradio==3.37.0, but you have gradio 3.35.2 which is incompatible.
videollava 1.0.0 requires gradio-client==0.7.0, but you have gradio-client 0.2.9 which is incompat

In [14]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score

# Load and preprocess the data
data = pd.read_csv('49_updated.csv')
features = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'l', 'k', 'm']
target = 'group_status'

# Normalize features
scaler = StandardScaler()
data[features] = scaler.fit_transform(data[features])

# Prepare sequences of 10 rows
sequence_length = 10

def create_sequences(data, sequence_length, target_col):
    sequences = []
    labels = []
    for i in range(len(data) - sequence_length):
        seq = data[features].iloc[i:i+sequence_length].values
        label = data[target_col].iloc[i + sequence_length - 1]
        sequences.append(seq)
        labels.append(label)
    return np.array(sequences), np.array(labels)

sequences, labels = create_sequences(data, sequence_length, target)

# Split into train and test sets
split_ratio = 0.8
split_index = int(split_ratio * len(sequences))
train_sequences, test_sequences = sequences[:split_index], sequences[split_index:]
train_labels, test_labels = labels[:split_index], labels[split_index:]

# Custom Dataset
class TimeSeriesDataset(Dataset):
    def __init__(self, sequences, labels):
        self.sequences = torch.tensor(sequences, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.float32)
    
    def __len__(self):
        return len(self.sequences)
    
    def __getitem__(self, idx):
        return self.sequences[idx], self.labels[idx]

train_dataset = TimeSeriesDataset(train_sequences, train_labels)
test_dataset = TimeSeriesDataset(test_sequences, test_labels)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Transformer model with an embedding layer to adjust input dimensions
class TransformerClassifier(nn.Module):
    def __init__(self, input_dim, emb_dim, n_heads, hidden_dim, n_layers):
        super(TransformerClassifier, self).__init__()
        # Embedding layer to adjust input dimension
        self.embedding = nn.Linear(input_dim, emb_dim)
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=emb_dim, nhead=n_heads, dim_feedforward=hidden_dim)
        self.transformer = nn.TransformerEncoder(self.encoder_layer, num_layers=n_layers)
        self.fc = nn.Linear(emb_dim, 1)
    
    def forward(self, x):
        x = self.embedding(x)  # Adjust to emb_dim
        x = self.transformer(x)
        x = x.mean(dim=1)
        x = self.fc(x)
        return torch.sigmoid(x)

# Model parameters
input_dim = len(features)
emb_dim = 16  # Choose an embedding dimension divisible by n_heads
n_heads = 4
hidden_dim = 64
n_layers = 2
model = TransformerClassifier(input_dim=input_dim, emb_dim=emb_dim, n_heads=n_heads, hidden_dim=hidden_dim, n_layers=n_layers)

# Training setup
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
def train_model(model, train_loader, criterion, optimizer, epochs=10):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for sequences, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(sequences)
            loss = criterion(outputs.squeeze(), labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss / len(train_loader):.4f}")

# Evaluation function
def evaluate_model(model, test_loader):
    model.eval()
    predictions, targets = [], []
    with torch.no_grad():
        for sequences, labels in test_loader:
            outputs = model(sequences)
            predictions.extend((outputs.squeeze() > 0.5).int().numpy())
            targets.extend(labels.numpy())
    accuracy = accuracy_score(targets, predictions)
    f1 = f1_score(targets, predictions)
    print(f"Test Accuracy: {accuracy:.4f}, F1 Score: {f1:.4f}")

# Train and evaluate
train_model(model, train_loader, criterion, optimizer, epochs=10)
evaluate_model(model, test_loader)


Epoch 1/10, Loss: 0.2748
Epoch 2/10, Loss: 0.2452
Epoch 3/10, Loss: 0.2337
Epoch 4/10, Loss: 0.2284
Epoch 5/10, Loss: 0.2244
Epoch 6/10, Loss: 0.2219
Epoch 7/10, Loss: 0.2206
Epoch 8/10, Loss: 0.2191
Epoch 9/10, Loss: 0.2182
Epoch 10/10, Loss: 0.2168
Test Accuracy: 0.9360, F1 Score: 0.9596


In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score

# Load and preprocess the training data
data = pd.read_csv('49_updated.csv')
features = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'l', 'k', 'm']
target = 'group_status'

# Normalize features
scaler = StandardScaler()
data[features] = scaler.fit_transform(data[features])

# Prepare sequences of 10 rows
sequence_length = 10

def create_sequences(data, sequence_length, target_col):
    sequences = []
    labels = []
    for i in range(len(data) - sequence_length):
        seq = data[features].iloc[i:i+sequence_length].values
        label = data[target_col].iloc[i + sequence_length - 1]
        sequences.append(seq)
        labels.append(label)
    return np.array(sequences), np.array(labels)

sequences, labels = create_sequences(data, sequence_length, target)

# Split into train and test sets
split_ratio = 0.8
split_index = int(split_ratio * len(sequences))
train_sequences, test_sequences = sequences[:split_index], sequences[split_index:]
train_labels, test_labels = labels[:split_index], labels[split_index:]

# Custom Dataset
class TimeSeriesDataset(Dataset):
    def __init__(self, sequences, labels):
        self.sequences = torch.tensor(sequences, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.float32)
    
    def __len__(self):
        return len(self.sequences)
    
    def __getitem__(self, idx):
        return self.sequences[idx], self.labels[idx]

train_dataset = TimeSeriesDataset(train_sequences, train_labels)
test_dataset = TimeSeriesDataset(test_sequences, test_labels)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Transformer model with an embedding layer to adjust input dimensions
class TransformerClassifier(nn.Module):
    def __init__(self, input_dim, emb_dim, n_heads, hidden_dim, n_layers):
        super(TransformerClassifier, self).__init__()
        self.embedding = nn.Linear(input_dim, emb_dim)
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=emb_dim, nhead=n_heads, dim_feedforward=hidden_dim)
        self.transformer = nn.TransformerEncoder(self.encoder_layer, num_layers=n_layers)
        self.fc = nn.Linear(emb_dim, 1)
    
    def forward(self, x):
        x = self.embedding(x)
        x = self.transformer(x)
        x = x.mean(dim=1)
        x = self.fc(x)
        return torch.sigmoid(x)

# Model parameters
input_dim = len(features)
emb_dim = 16
n_heads = 4
hidden_dim = 64
n_layers = 2
model = TransformerClassifier(input_dim=input_dim, emb_dim=emb_dim, n_heads=n_heads, hidden_dim=hidden_dim, n_layers=n_layers)

# Training setup
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
def train_model(model, train_loader, criterion, optimizer, epochs=10):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for sequences, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(sequences)
            loss = criterion(outputs.squeeze(), labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss / len(train_loader):.4f}")

# Evaluation function
def evaluate_model(model, test_loader):
    model.eval()
    predictions, targets = [], []
    with torch.no_grad():
        for sequences, labels in test_loader:
            outputs = model(sequences)
            predictions.extend((outputs.squeeze() > 0.5).int().numpy())
            targets.extend(labels.numpy())
    accuracy = accuracy_score(targets, predictions)
    f1 = f1_score(targets, predictions)
    print(f"Test Accuracy: {accuracy:.4f}, F1 Score: {f1:.4f}")

# Train and save the model
train_model(model, train_loader, criterion, optimizer, epochs=10)
evaluate_model(model, test_loader)
torch.save(model.state_dict(), 'transformer_classifier.pth')
print("Model saved as transformer_classifier.pth")

# Function to load and predict on a new dataset
def predict_new_data(file_path, model, scaler):
    # Load and preprocess new data
    new_data = pd.read_csv(file_path)
    new_data[features] = scaler.transform(new_data[features])  # Normalize using the same scaler
    
    # Prepare sequences for prediction
    new_sequences = []
    for i in range(len(new_data) - sequence_length):
        seq = new_data[features].iloc[i:i+sequence_length].values
        new_sequences.append(seq)
    new_sequences = np.array(new_sequences)

    # Prepare DataLoader for new data
    new_dataset = TimeSeriesDataset(new_sequences, np.zeros(len(new_sequences)))  # Use zeros as dummy labels
    new_loader = DataLoader(new_dataset, batch_size=32, shuffle=False)
    
    # Predict
    model.eval()
    all_predictions = []
    with torch.no_grad():
        for sequences, _ in new_loader:
            outputs = model(sequences)
            predictions = (outputs.squeeze() > 0.5).int().numpy()
            all_predictions.extend(predictions)
    
    return all_predictions

# Load the saved model
model = TransformerClassifier(input_dim=input_dim, emb_dim=emb_dim, n_heads=n_heads, hidden_dim=hidden_dim, n_layers=n_layers)
model.load_state_dict(torch.load('transformer_classifier.pth'))
print("Model loaded for prediction.")






Epoch 1/10, Loss: 0.2760
Epoch 2/10, Loss: 0.2441
Epoch 3/10, Loss: 0.2336
Epoch 4/10, Loss: 0.2274
Epoch 5/10, Loss: 0.2237
Epoch 6/10, Loss: 0.2210
Epoch 7/10, Loss: 0.2192
Epoch 8/10, Loss: 0.2161
Epoch 9/10, Loss: 0.2154
Epoch 10/10, Loss: 0.2140
Test Accuracy: 0.9340, F1 Score: 0.9583
Model saved as transformer_classifier.pth
Model loaded for prediction.


FileNotFoundError: [Errno 2] No such file or directory: 'new_data.csv'

In [23]:
# Predict on a new CSV file
new_predictions = predict_new_data('46.csv', model, scaler)
print("Predictions on new data:", new_predictions)

Predictions on new data: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
