In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from datasets import load_dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from tqdm import tqdm

# Load the TweetEval dataset
dataset = load_dataset("tweet_eval", "sentiment")

# Prepare the data
texts = dataset["train"]["text"] + dataset["test"]["text"]
labels = dataset["train"]["label"] + dataset["test"]["label"]

# Split the data
train_texts, val_texts, train_labels, val_labels = train_test_split(texts, labels, test_size=0.2, random_state=42)

# Initialize the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

class TweetDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]

        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

# Create datasets
train_dataset = TweetDataset(train_texts, train_labels, tokenizer)
val_dataset = TweetDataset(val_texts, val_labels, tokenizer)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

# Initialize the model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)

# Set up the optimizer
optimizer = AdamW(model.parameters(), lr=2e-5)

# Training loop
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

num_epochs = 3

for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for batch in tqdm(train_loader, desc=f"Epoch {epoch + 1}/{num_epochs}"):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()

        loss.backward()
        optimizer.step()

    avg_train_loss = total_loss / len(train_loader)
    print(f"Average train loss: {avg_train_loss:.4f}")

    # Validation
    model.eval()
    val_preds, val_true = [], []

    with torch.no_grad():
        for batch in tqdm(val_loader, desc="Validation"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids, attention_mask=attention_mask)
            preds = torch.argmax(outputs.logits, dim=1)

            val_preds.extend(preds.cpu().numpy())
            val_true.extend(labels.cpu().numpy())

    val_accuracy = accuracy_score(val_true, val_preds)
    print(f"Validation Accuracy: {val_accuracy:.4f}")
    print(classification_report(val_true, val_preds))

# Save the model
model.save_pretrained("tweet_authorship_model")
tokenizer.save_pretrained("tweet_authorship_model")

print("Training completed and model saved.")

  from .autonotebook import tqdm as notebook_tqdm
Downloading readme: 100%|██████████| 23.9k/23.9k [00:00<00:00, 24.0MB/s]
Downloading data: 100%|██████████| 3.78M/3.78M [00:01<00:00, 3.34MB/s]
Downloading data: 100%|██████████| 901k/901k [00:00<00:00, 1.03MB/s]
Downloading data: 100%|██████████| 167k/167k [00:00<00:00, 751kB/s]
Generating train split: 100%|██████████| 45615/45615 [00:00<00:00, 611435.88 examples/s]
Generating test split: 100%|██████████| 12284/12284 [00:00<00:00, 1501116.75 examples/s]
Generating validation split: 100%|██████████| 2000/2000 [00:00<00:00, 630390.62 examples/s]
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initia

Average train loss: 0.6622


Validation: 100%|██████████| 724/724 [18:42<00:00,  1.55s/it]


Validation Accuracy: 0.7161
              precision    recall  f1-score   support

           0       0.72      0.63      0.67      2258
           1       0.75      0.64      0.69      5322
           2       0.68      0.86      0.76      4000

    accuracy                           0.72     11580
   macro avg       0.72      0.71      0.71     11580
weighted avg       0.72      0.72      0.71     11580



Epoch 2/3: 100%|██████████| 2895/2895 [3:32:49<00:00,  4.41s/it]  


Average train loss: 0.4794


Validation: 100%|██████████| 724/724 [18:41<00:00,  1.55s/it]


Validation Accuracy: 0.7276
              precision    recall  f1-score   support

           0       0.71      0.62      0.66      2258
           1       0.69      0.79      0.74      5322
           2       0.80      0.71      0.75      4000

    accuracy                           0.73     11580
   macro avg       0.73      0.70      0.72     11580
weighted avg       0.73      0.73      0.73     11580



Epoch 3/3: 100%|██████████| 2895/2895 [3:35:18<00:00,  4.46s/it]  


Average train loss: 0.2939


Validation: 100%|██████████| 724/724 [18:45<00:00,  1.55s/it]


Validation Accuracy: 0.7133
              precision    recall  f1-score   support

           0       0.65      0.72      0.68      2258
           1       0.70      0.72      0.71      5322
           2       0.78      0.70      0.74      4000

    accuracy                           0.71     11580
   macro avg       0.71      0.71      0.71     11580
weighted avg       0.72      0.71      0.71     11580

Training completed and model saved.


In [1]:
#!pip install transformers datasets torch scikit-learn pandas nltk

import pandas as pd
import numpy as np
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import re

# Download NLTK data
nltk.download('punkt')
nltk.download('stopwords')

# Load the Reddit dataset
dataset = load_dataset("reddit", split="train[:100000]", trust_remote_code=True)  # Limiting to 100k samples for this example

# Convert to pandas DataFrame for easier preprocessing
df = pd.DataFrame(dataset)

# Preprocessing function
def preprocess_text(text):
    # Convert to lowercase
    text = text.lower()
    # Remove special characters and numbers
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    # Tokenize
    tokens = word_tokenize(text)
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]
    # Join tokens back into string
    return ' '.join(tokens)

# Apply preprocessing
df['processed_text'] = df['content']

# Use 'author' as our target for authorship attribution
# Keep only authors with at least 50 comments
author_counts = df['author'].value_counts()
authors_to_keep = author_counts[author_counts >= 8].index
df = df[df['author'].isin(authors_to_keep)]

# Encode author labels
le = LabelEncoder()
df['author_encoded'] = le.fit_transform(df['author'])

# When splitting the data
train_texts, test_texts, train_labels, test_labels = train_test_split(
    df['processed_text'], df['author_encoded'], 
    test_size=0.2, random_state=42, stratify=df['author_encoded']
)

# Further split train into train and validation
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, 
    test_size=0.1, random_state=42, stratify=train_labels
)

# Tokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

def tokenize_function(examples):
    return tokenizer(examples, padding="max_length", truncation=True, max_length=128)

train_encodings = tokenize_function(train_texts.tolist())
val_encodings = tokenize_function(val_texts.tolist())
test_encodings = tokenize_function(test_texts.tolist())

# Dataset class
class RedditDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = RedditDataset(train_encodings, train_labels.tolist())
val_dataset = RedditDataset(val_encodings, val_labels.tolist())
test_dataset = RedditDataset(test_encodings, test_labels.tolist())

# Model
model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=len(le.classes_))

# Training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=2,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    evaluation_strategy="steps",
    eval_steps=500,
    save_steps=1000,
    load_best_model_at_end=True,
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset
)

# Train the model
trainer.train()

# Evaluate the model
predictions = trainer.predict(test_dataset)
preds = np.argmax(predictions.predictions, axis=-1)

from sklearn.metrics import accuracy_score, classification_report
print(accuracy_score(test_labels, preds))
print(classification_report(test_labels, preds, target_names=le.classes_))

  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\mcant\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\mcant\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  8%|▊         | 10/118 [00:50<08:17,  4.60s/it]

{'loss': 4.8421, 'grad_norm': 6.978228569030762, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.17}


 17%|█▋        | 20/118 [01:35<07:22,  4.51s/it]

{'loss': 4.858, 'grad_norm': 6.721885681152344, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.34}


 25%|██▌       | 30/118 [02:21<06:32,  4.46s/it]

{'loss': 4.8734, 'grad_norm': 5.634627342224121, 'learning_rate': 3e-06, 'epoch': 0.51}


 34%|███▍      | 40/118 [03:07<05:52,  4.52s/it]

{'loss': 4.8622, 'grad_norm': 5.481440544128418, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.68}


 42%|████▏     | 50/118 [03:53<05:12,  4.60s/it]

{'loss': 4.8103, 'grad_norm': 6.180174350738525, 'learning_rate': 5e-06, 'epoch': 0.85}


 51%|█████     | 60/118 [04:36<03:48,  3.94s/it]

{'loss': 4.788, 'grad_norm': 8.272601127624512, 'learning_rate': 6e-06, 'epoch': 1.02}


 59%|█████▉    | 70/118 [05:22<03:37,  4.53s/it]

{'loss': 4.7908, 'grad_norm': 9.912836074829102, 'learning_rate': 7.000000000000001e-06, 'epoch': 1.19}


 68%|██████▊   | 80/118 [06:08<02:53,  4.58s/it]

{'loss': 4.7754, 'grad_norm': 7.177762031555176, 'learning_rate': 8.000000000000001e-06, 'epoch': 1.36}


 76%|███████▋  | 90/118 [06:53<02:06,  4.52s/it]

{'loss': 4.8321, 'grad_norm': 5.76736307144165, 'learning_rate': 9e-06, 'epoch': 1.53}


 85%|████████▍ | 100/118 [07:39<01:21,  4.53s/it]

{'loss': 4.789, 'grad_norm': 6.290131568908691, 'learning_rate': 1e-05, 'epoch': 1.69}


 93%|█████████▎| 110/118 [08:24<00:36,  4.57s/it]

{'loss': 4.7866, 'grad_norm': 8.413372039794922, 'learning_rate': 1.1000000000000001e-05, 'epoch': 1.86}


100%|██████████| 118/118 [08:58<00:00,  4.56s/it]


{'train_runtime': 538.1207, 'train_samples_per_second': 3.464, 'train_steps_per_second': 0.219, 'train_loss': 4.816403114189536, 'epoch': 2.0}


100%|██████████| 5/5 [00:20<00:00,  4.01s/it]


0.03424657534246575


ValueError: Number of classes, 114, does not match size of target_names, 123. Try specifying the labels parameter

In [3]:
from sklearn.metrics import accuracy_score, classification_report

# After making predictions
predictions = trainer.predict(test_dataset)
preds = np.argmax(predictions.predictions, axis=-1)

# Get the unique classes in our test set
unique_classes = np.unique(test_labels)

# Create a mapping from the original label encoder to the classes in our test set
label_map = {i: le.classes_[i] for i in unique_classes}

# Generate the classification report
print(accuracy_score(test_labels, preds))
print(classification_report(test_labels, preds, 
                            target_names=[label_map[i] for i in sorted(label_map.keys())],
                            labels=sorted(label_map.keys())))

# If you want to see which authors are in the test set
print("Authors in test set:")
for i, author in label_map.items():
    print(f"Label {i}: {author}")

100%|██████████| 5/5 [00:18<00:00,  3.63s/it]

0.03424657534246575
                      precision    recall  f1-score   support

      A_Polite_Noise       0.00      0.00      0.00         3
           Anomander       0.00      0.00      0.00         3
            BZenMojo       0.00      0.00      0.00         1
          Batty-Koda       0.00      0.00      0.00         5
       Blenderhead36       0.00      0.00      0.00         2
   BluepillProfessor       0.00      0.00      0.00         1
   BuildMyPaperHeart       0.00      0.00      0.00         1
           CaspianX2       0.00      0.00      0.00         2
     Cebus_capucinus       0.00      0.00      0.00         1
             Chaipod       0.00      0.00      0.00         3
          CocoSavege       0.00      0.00      0.00         4
        DashingLeech       0.00      0.00      0.00         3
         Death_Star_       0.00      0.00      0.00         4
             DejaBoo       0.04      0.10      0.06        10
             DesCo83       0.00      0.00      0.


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
from transformers import RobertaTokenizer, RobertaForSequenceClassification

# Replace the BERT tokenizer with RoBERTa
tokenizer = RobertaTokenizer.from_pretrained("roberta-large")

def tokenize_function(examples):
    return tokenizer(examples, padding="max_length", truncation=True, max_length=128)

# ... (rest of the data preparation code remains the same)

# Replace the BERT model with RoBERTa
model = RobertaForSequenceClassification.from_pretrained("roberta-large", num_labels=len(le.classes_))

# Adjust training arguments for the larger model
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,  # You might need fewer epochs with a more powerful model
    per_device_train_batch_size=8,  # Reduced batch size due to larger model
    per_device_eval_batch_size=16,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    evaluation_strategy="steps",
    eval_steps=500,
    save_steps=1000,
    load_best_model_at_end=True,
    gradient_accumulation_steps=2,  # This effectively doubles the batch size
)

In [2]:
# Convert to pandas DataFrame for easier preprocessing
df = pd.DataFrame(dataset)

# Preprocessing function
def preprocess_text(text):
    # Convert to lowercase
    text = text.lower()
    # Remove special characters and numbers
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    # Tokenize
    tokens = word_tokenize(text)
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]
    # Join tokens back into string
    return ' '.join(tokens)

# Apply preprocessing
df['processed_text'] = df['content'].apply(preprocess_text)

In [3]:
# Apply preprocessing
df['processed_text'] = df['content']

In [4]:
# Use 'author' as our target for authorship attribution
# Keep only authors with at least 50 comments
author_counts = df['author'].value_counts()
authors_to_keep = author_counts[author_counts >= 8].index
df = df[df['author'].isin(authors_to_keep)]

# Encode author labels
le = LabelEncoder()
df['author_encoded'] = le.fit_transform(df['author'])

# Split the data
train_texts, test_texts, train_labels, test_labels = train_test_split(
    df['processed_text'], df['author_encoded'], test_size=0.2, random_state=42
)

# Further split train into train and validation
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, test_size=0.1, random_state=42
)

# Tokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

def tokenize_function(examples):
    return tokenizer(examples, padding="max_length", truncation=True, max_length=128)

train_encodings = tokenize_function(train_texts.tolist())
val_encodings = tokenize_function(val_texts.tolist())
test_encodings = tokenize_function(test_texts.tolist())

# Dataset class
class RedditDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = RedditDataset(train_encodings, train_labels.tolist())
val_dataset = RedditDataset(val_encodings, val_labels.tolist())
test_dataset = RedditDataset(test_encodings, test_labels.tolist())

# Model
model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=len(le.classes_))

# Training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=2,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    evaluation_strategy="steps",
    eval_steps=500,
    save_steps=1000,
    load_best_model_at_end=True,
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset
)

# Train the model
trainer.train()

# Evaluate the model
predictions = trainer.predict(test_dataset)
preds = np.argmax(predictions.predictions, axis=-1)

from sklearn.metrics import accuracy_score, classification_report
print(accuracy_score(test_labels, preds))
print(classification_report(test_labels, preds, target_names=le.classes_))

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  2%|▏         | 2/132 [00:15<16:58,  7.83s/it]

: 

In [4]:
from transformers import RobertaTokenizer, RobertaForSequenceClassification

# Replace the BERT tokenizer with RoBERTa
tokenizer = RobertaTokenizer.from_pretrained("roberta-large")

def tokenize_function(examples):
    return tokenizer(examples, padding="max_length", truncation=True, max_length=128)

train_encodings = tokenize_function(train_texts.tolist())
val_encodings = tokenize_function(val_texts.tolist())
test_encodings = tokenize_function(test_texts.tolist())

# Dataset class
class RedditDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = RedditDataset(train_encodings, train_labels.tolist())
val_dataset = RedditDataset(val_encodings, val_labels.tolist())
test_dataset = RedditDataset(test_encodings, test_labels.tolist())

# Replace the BERT model with RoBERTa
model = RobertaForSequenceClassification.from_pretrained("roberta-large", num_labels=len(le.classes_))

# Adjust training arguments for the larger model
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,  # You might need fewer epochs with a more powerful model
    per_device_train_batch_size=8,  # Reduced batch size due to larger model
    per_device_eval_batch_size=16,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    evaluation_strategy="steps",
    eval_steps=500,
    save_steps=1000,
    load_best_model_at_end=True,
    gradient_accumulation_steps=2,  # This effectively doubles the batch size
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset
)

# Train the model
trainer.train()

from sklearn.metrics import accuracy_score, classification_report

# After making predictions
predictions = trainer.predict(test_dataset)
preds = np.argmax(predictions.predictions, axis=-1)

# Get the unique classes in our test set
unique_classes = np.unique(test_labels)

# Create a mapping from the original label encoder to the classes in our test set
label_map = {i: le.classes_[i] for i in unique_classes}

# Generate the classification report
print(accuracy_score(test_labels, preds))
print(classification_report(test_labels, preds, 
                            target_names=[label_map[i] for i in sorted(label_map.keys())],
                            labels=sorted(label_map.keys())))

# If you want to see which authors are in the test set
print("Authors in test set:")
for i, author in label_map.items():
    print(f"Label {i}: {author}")

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  6%|▌         | 10/174 [03:06<45:49, 16.77s/it] 

{'loss': 4.8226, 'grad_norm': 12.095752716064453, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.17}


 11%|█▏        | 20/174 [05:49<41:04, 16.00s/it]

{'loss': 4.8732, 'grad_norm': 39.97496032714844, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.34}


 17%|█▋        | 30/174 [08:27<37:42, 15.71s/it]

{'loss': 4.8165, 'grad_norm': 9.188164710998535, 'learning_rate': 3e-06, 'epoch': 0.51}


 23%|██▎       | 40/174 [11:04<35:05, 15.72s/it]

{'loss': 4.8256, 'grad_norm': 10.712109565734863, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.68}


 29%|██▊       | 50/174 [13:43<32:26, 15.70s/it]

{'loss': 4.8151, 'grad_norm': 15.533021926879883, 'learning_rate': 5e-06, 'epoch': 0.85}


 34%|███▍      | 60/174 [16:17<28:20, 14.92s/it]

{'loss': 4.8367, 'grad_norm': 16.344280242919922, 'learning_rate': 6e-06, 'epoch': 1.03}


 40%|████      | 70/174 [18:56<27:27, 15.84s/it]

{'loss': 4.7927, 'grad_norm': 280.9397888183594, 'learning_rate': 7.000000000000001e-06, 'epoch': 1.2}


 46%|████▌     | 80/174 [21:32<24:25, 15.59s/it]

{'loss': 4.7472, 'grad_norm': 10.857053756713867, 'learning_rate': 8.000000000000001e-06, 'epoch': 1.37}


 52%|█████▏    | 90/174 [24:08<21:43, 15.51s/it]

{'loss': 4.7497, 'grad_norm': 14.414872169494629, 'learning_rate': 9e-06, 'epoch': 1.54}


 57%|█████▋    | 100/174 [26:44<19:08, 15.52s/it]

{'loss': 4.7277, 'grad_norm': 29.80389976501465, 'learning_rate': 1e-05, 'epoch': 1.71}


 63%|██████▎   | 110/174 [29:22<17:01, 15.97s/it]

{'loss': 4.7703, 'grad_norm': 13.208962440490723, 'learning_rate': 1.1000000000000001e-05, 'epoch': 1.88}


 69%|██████▉   | 120/174 [31:58<14:03, 15.61s/it]

{'loss': 4.6662, 'grad_norm': 15.258040428161621, 'learning_rate': 1.2e-05, 'epoch': 2.05}


 75%|███████▍  | 130/174 [34:37<11:47, 16.08s/it]

{'loss': 4.5175, 'grad_norm': 15.18865966796875, 'learning_rate': 1.3000000000000001e-05, 'epoch': 2.22}


 80%|████████  | 140/174 [37:19<09:11, 16.23s/it]

{'loss': 4.5451, 'grad_norm': 17.078166961669922, 'learning_rate': 1.4000000000000001e-05, 'epoch': 2.39}


 86%|████████▌ | 150/174 [39:56<06:14, 15.62s/it]

{'loss': 4.5563, 'grad_norm': 19.71668243408203, 'learning_rate': 1.5e-05, 'epoch': 2.56}


 92%|█████████▏| 160/174 [42:37<03:45, 16.13s/it]

{'loss': 4.4806, 'grad_norm': 43.998023986816406, 'learning_rate': 1.6000000000000003e-05, 'epoch': 2.74}


 98%|█████████▊| 170/174 [45:14<01:03, 15.78s/it]

{'loss': 4.4882, 'grad_norm': 17.25783920288086, 'learning_rate': 1.7000000000000003e-05, 'epoch': 2.91}


100%|██████████| 174/174 [46:16<00:00, 15.96s/it]


{'train_runtime': 2776.6647, 'train_samples_per_second': 1.007, 'train_steps_per_second': 0.063, 'train_loss': 4.7029325989471085, 'epoch': 2.97}


100%|██████████| 19/19 [01:23<00:00,  4.40s/it]

0.11301369863013698
                      precision    recall  f1-score   support

      A_Polite_Noise       0.00      0.00      0.00         3
           Anomander       0.00      0.00      0.00         3
            BZenMojo       0.00      0.00      0.00         1
          Batty-Koda       0.00      0.00      0.00         5
       Blenderhead36       0.00      0.00      0.00         2
   BluepillProfessor       0.00      0.00      0.00         1
   BuildMyPaperHeart       0.00      0.00      0.00         1
           CaspianX2       0.00      0.00      0.00         2
     Cebus_capucinus       0.00      0.00      0.00         1
             Chaipod       0.50      0.33      0.40         3
          CocoSavege       0.00      0.00      0.00         4
        DashingLeech       0.00      0.00      0.00         3
         Death_Star_       0.00      0.00      0.00         4
             DejaBoo       0.18      0.90      0.30        10
             DesCo83       0.00      0.00      0.


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [24]:
import time 
# Fib
def fibonacci_of(n):
    if n in {0, 1}:  # Base case
        return n
    return fibonacci_of(n - 1) + fibonacci_of(n - 2)  # Recursive case

def ultra_fib(n, lookup):
    if n<=2:  # Base case
        lookup.append(0)
        lookup.append(1)
        lookup.append(lookup[0] + lookup[1])
        return lookup[n]
    if n>2:
        lookup.append(ultra_fib(n-1,lookup) + lookup[n-2])
        return lookup[n]
    
def super_ultra_fib(n):
    if n<=2:  # Base case
        lookup = []
        lookup.append(0)
        lookup.append(1)
        lookup.append(lookup[0] + lookup[1])
        return lookup[n]
    if n>2:
        lookup.append(super_ultra_fib(n-1) + lookup[n-2])
        return lookup[n]
    
start = time.time()
print([fibonacci_of(n) for n in range(40)])
end = time.time()
print(f"Iteration: \tTime taken: {(end-start)*10**3:.09f}ms")

start2 = time.time()
print([ultra_fib(n, []) for n in range(40)])
end2 = time.time()
print(f"Iteration: \tTime taken: {(end2-start2)*10**3:.09f}ms")

start3 = time.time()
lookup = []
print([super_ultra_fib(n) for n in range(40)])
end3 = time.time()
print(f"Iteration: \tTime taken: {(end3-start3)*10**3:.09f}ms")

[0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584, 4181, 6765, 10946, 17711, 28657, 46368, 75025, 121393, 196418, 317811, 514229, 832040, 1346269, 2178309, 3524578, 5702887, 9227465, 14930352, 24157817, 39088169, 63245986]
Iteration: 	Time taken: 37867.436885834ms
[0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584, 4181, 6765, 10946, 17711, 28657, 46368, 75025, 121393, 196418, 317811, 514229, 832040, 1346269, 2178309, 3524578, 5702887, 9227465, 14930352, 24157817, 39088169, 63245986]
Iteration: 	Time taken: 0.000000000ms


UnboundLocalError: local variable 'lookup' referenced before assignment