In [None]:
#using transformers_env

In [None]:
pip install peft

In [None]:
import torch
print(torch.cuda.is_available())  # Should print True
print(torch.version.cuda)  # Should print 11.8

In [None]:
pip install torch==2.0.1+cu118 torchvision==0.15.2+cu118 torchaudio==2.0.2+cu118 --index-url https://download.pytorch.org/whl/cu118

In [None]:
import torch
torch.cuda.empty_cache()

In [None]:
import torch
print(torch.cuda.memory_allocated() / 1024**2, "MB allocated")
print(torch.cuda.memory_reserved() / 1024**2, "MB reserved")

model training code

In [None]:
import pandas as pd
import os
import torch
import shutil
import time
import psutil
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, TrainerCallback, EarlyStoppingCallback

def close_file_handles(file_path):
    try:
        for proc in psutil.process_iter(['pid', 'name', 'open_files']):
            try:
                open_files = proc.info.get('open_files')
                if open_files is None:
                    continue
                for file in open_files:
                    if file.path.startswith(file_path):
                        print(f"Closing handle: {file.path} (PID: {proc.pid}, Name: {proc.info['name']})")
                        proc.terminate()
                        proc.wait(timeout=5)
            except (psutil.AccessDenied, psutil.NoSuchProcess, Exception) as e:
                print(f"Skipping process {proc.pid}: {e}")
    except Exception as e:
        print(f"Error closing handles: {e}")
        
# Check disk space
total, used, free = shutil.disk_usage('D:/')
if free < 5 * 1024**3:
    raise RuntimeError(f"Insufficient disk space: {free/1024**3:.2f}GB free, need ~5GB")

# Check write access
output_dir = r'D:/Python/dating coach/dating_coach/dating_coach_gpt2'
if not os.access(output_dir, os.W_OK):
    raise RuntimeError("No write access to output directory")

# Set PyTorch memory optimization
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# Memory logging callback
class MemoryLoggingCallback(TrainerCallback):
    def on_step_end(self, args, state, control, **kwargs):
        print(f"Step {state.global_step}: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")

# Save checkpoint callback
class SaveCheckpointCallback(TrainerCallback):
    def on_save(self, args, state, control, **kwargs):
        try:
            model.save_pretrained(os.path.join(args.output_dir, f'checkpoint-{state.global_step}'))
            tokenizer.save_pretrained(os.path.join(args.output_dir, f'checkpoint-{state.global_step}'))
            print(f"Saved checkpoint at step {state.global_step}")
        except Exception as e:
            print(f"Checkpoint save failed at step {state.global_step}: {e}")

# Clean text
def clean_text(text):
    text = str(text).strip()
    if not text[-1] in [' ', '.', ',', '!', '?']:
        words = text.split()
        text = ' '.join(words[:-1]) if words else text
    return text.replace("your doing", "you're doing").replace("mistake guys make", "mistakes guys make")

# Read CSV data
data = []
csv_path = r'D:/Python/dating coach/formatted_data.csv'  # Adjust
if os.path.isfile(csv_path):
    df = pd.read_csv(csv_path)
    data = [{'title': row['title'], 'text': clean_text(row['text'])} for _, row in df.iterrows()]
else:
    for file in os.listdir(csv_path):
        if file.endswith('.csv'):
            df = pd.read_csv(os.path.join(csv_path, file))
            data.extend([{'title': row['title'], 'text': clean_text(row['text'])} for _, row in df.iterrows()])

# Format samples
formatted_samples = []
for item in data:
    sample = (
        f"[Scenario]: {item['title']}\n"
        f"{item['text']}\n"
        f"[Assistant]: For {item['title'].lower()}, keep it short and casual, like: 'Hey, great meeting you! Free this weekend?' Avoid long, eager messages as they can seem pushy."
    )
    formatted_samples.append(sample)

# # Subsample
# if len(formatted_samples) > 500:
#     formatted_samples = formatted_samples[:500]
dataset = Dataset.from_dict({'text': formatted_samples})

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token
special_tokens = {'additional_special_tokens': ['[Scenario]', '[Assistant]']}
tokenizer.add_special_tokens(special_tokens)

# Load model
model = AutoModelForCausalLM.from_pretrained('gpt2')
model.resize_token_embeddings(len(tokenizer))  # Fix lm_head.weight

# Enable gradient checkpointing
model.gradient_checkpointing_enable()

# Clear GPU memory
torch.cuda.empty_cache()

# Tokenize dataset with labels
def tokenize(examples):
    tokenized = tokenizer(
        examples['text'],
        truncation=True,
        padding='max_length',
        max_length=128  # Reduced for speed
    )
    tokenized['labels'] = tokenized['input_ids'].copy()
    return tokenized
tokenized_dataset = dataset.map(tokenize, batched=True)

# Split dataset
train_dataset = tokenized_dataset.shuffle().select(range(int(0.8 * len(tokenized_dataset))))
eval_dataset = tokenized_dataset.shuffle().select(range(int(0.8 * len(tokenized_dataset)), len(tokenized_dataset)))

# Training arguments
training_args = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=60,  # Increased
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=2,
    learning_rate=5e-6,  # Lowered
    warmup_steps=100,
    max_grad_norm=1.0,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=100,
    eval_strategy='steps',
    eval_steps=500,
    save_strategy='steps',
    save_steps=500,
    save_total_limit=3,
    save_safetensors=False,
    load_best_model_at_end=True,
    fp16=True,
    gradient_checkpointing=True,
    lr_scheduler_type='linear',
    report_to='none'
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    callbacks=[
        MemoryLoggingCallback(),
        SaveCheckpointCallback(),
        EarlyStoppingCallback(early_stopping_patience=5, early_stopping_threshold=0.01)
    ]
)

# Train
try:
    trainer.train()
except Exception as e:
    print(f"Training failed: {e}")
    try:
        close_file_handles(os.path.join(output_dir, 'final_new'))
        model.save_pretrained(os.path.join(output_dir, 'final_new'), save_safetensors=False)
        tokenizer.save_pretrained(os.path.join(output_dir, 'final_new'))
    except Exception as save_e:
        print(f"Final save failed: {save_e}")
    raise

# Save model and tokenizer
for attempt in range(3):
    try:
        close_file_handles(os.path.join(output_dir, 'final_new'))
        model.save_pretrained(os.path.join(output_dir, 'final_new'), save_safetensors=False)
        tokenizer.save_pretrained(os.path.join(output_dir, 'final_new'))
        print("Final model saved to final_new")
        break
    except Exception as e:
        print(f"Final save attempt {attempt+1} failed: {e}")
        time.sleep(5)
        if attempt == 2:
            print("Final save failed after 3 attempts")

In [None]:

# Tokenize dataset with labels
def tokenize(examples):
    tokenized = tokenizer(
        examples['text'],
        truncation=True,
        padding='max_length',
        max_length=128  # Reduced for speed
    )
    tokenized['labels'] = tokenized['input_ids'].copy()
    return tokenized

tokenized_dataset = dataset.map(tokenize, batched=True)

# Split dataset
train_dataset = tokenized_dataset.shuffle().select(range(int(0.8 * len(tokenized_dataset))))
eval_dataset = tokenized_dataset.shuffle().select(range(int(0.8 * len(tokenized_dataset)), len(tokenized_dataset)))

# Training arguments
training_args = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=30,  # Increased
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=2,
    learning_rate=1e-5,  # Lowered
    warmup_steps=100,
    max_grad_norm=1.0,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=100,
    eval_strategy='steps',
    eval_steps=500,
    save_strategy='steps',
    save_steps=500,
    save_total_limit=3,
    save_safetensors=False,
    load_best_model_at_end=True,
    fp16=True,
    gradient_checkpointing=True,
    lr_scheduler_type='linear',
    report_to='none'
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    callbacks=[
        MemoryLoggingCallback(),
        SaveCheckpointCallback(),
        EarlyStoppingCallback(early_stopping_patience=10, early_stopping_threshold=0.01)
    ]
)

# Train
try:
    trainer.train()
except Exception as e:
    print(f"Training failed: {e}")
    try:
        close_file_handles(os.path.join(output_dir, 'final_new'))
        model.save_pretrained(os.path.join(output_dir, 'final_new'), save_safetensors=False)
        tokenizer.save_pretrained(os.path.join(output_dir, 'final_new'))
    except Exception as save_e:
        print(f"Final save failed: {save_e}")
    raise

# Save model and tokenizer
for attempt in range(3):
    try:
        close_file_handles(os.path.join(output_dir, 'final_new'))
        model.save_pretrained(os.path.join(output_dir, 'final_new'), save_safetensors=False)
        tokenizer.save_pretrained(os.path.join(output_dir, 'final_new'))
        print("Final model saved to final_new")
        break
    except Exception as e:
        print(f"Final save attempt {attempt+1} failed: {e}")
        time.sleep(5)
        if attempt == 2:
            print("Final save failed after 3 attempts")

In [None]:
dataset

In [None]:
# Calculate total token count
total_tokens = sum(len(input_ids) for input_ids in dataset['input_ids'])
print(f"Total tokens: {total_tokens}")

interupted training continue code

In [None]:
import pandas as pd
import os
import torch
import shutil
import psutil
import time
import numpy
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, TrainerCallback, EarlyStoppingCallback
# Override torch.load for trusted checkpoints
original_torch_load = torch.load
def custom_torch_load(*args, **kwargs):
    kwargs['weights_only'] = False  # Disable weights_only for trusted checkpoint
    return original_torch_load(*args, **kwargs)
torch.load = custom_torch_load
# Close open file handles
def close_file_handles(file_path):
    try:
        for proc in psutil.process_iter(['pid', 'name', 'open_files']):
            try:
                open_files = proc.info.get('open_files')
                if open_files is None:
                    continue
                for file in open_files:
                    if file.path.startswith(file_path):
                        print(f"Closing handle: {file.path} (PID: {proc.pid}, Name: {proc.info['name']})")
                        proc.terminate()
                        proc.wait(timeout=5)
            except (psutil.AccessDenied, psutil.NoSuchProcess, Exception) as e:
                print(f"Skipping process {proc.pid}: {e}")
    except Exception as e:
        print(f"Error closing handles: {e}")

# Check disk space
total, used, free = shutil.disk_usage('D:/')
if free < 10 * 1024**3:
    raise RuntimeError(f"Insufficient disk space: {free/1024**3:.2f}GB free, need ~10GB")

# Check write access
output_dir = r'D:/Python/dating coach/dating_coach/dating_coach_gpt2'
if not os.access(output_dir, os.W_OK):
    raise RuntimeError("No write access to output directory")

# Find latest checkpoint
checkpoints = [d for d in os.listdir(output_dir) if d.startswith('checkpoint-')]
if not checkpoints:
    raise RuntimeError("No checkpoints found. Must restart training.")
latest_checkpoint = max(checkpoints, key=lambda x: int(x.split('-')[1]))
checkpoint_path = os.path.join(output_dir, latest_checkpoint)
print(f"Resuming from: {checkpoint_path}")

# Set PyTorch memory optimization
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# Memory logging callback
class MemoryLoggingCallback(TrainerCallback):
    def on_step_end(self, args, state, control, **kwargs):
        print(f"Step {state.global_step}: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")

# Save checkpoint callback
class SaveCheckpointCallback(TrainerCallback):
    def on_save(self, args, state, control, **kwargs):
        checkpoint_dir = os.path.join(args.output_dir, f'checkpoint-{state.global_step}')
        for attempt in range(3):
            try:
                close_file_handles(checkpoint_dir)
                model.save_pretrained(checkpoint_dir, save_safetensors=False)
                tokenizer.save_pretrained(checkpoint_dir)
                print(f"Saved checkpoint at step {state.global_step}")
                break
            except Exception as e:
                print(f"Checkpoint save attempt {attempt+1} failed: {e}")
                time.sleep(5)
                if attempt == 2:
                    print(f"Checkpoint save failed at step {state.global_step}")

# Clean text
def clean_text(text):
    text = str(text).strip()
    if not text[-1] in [' ', '.', ',', '!', '?']:
        words = text.split()
        text = ' '.join(words[:-1]) if words else text
    return text.replace("your doing", "you're doing").replace("mistake guys make", "mistakes guys make")

# Read CSV data
data = []
csv_path = r'D:/Python/dating coach/formatted_data.csv'  # Adjust
if os.path.isfile(csv_path):
    df = pd.read_csv(csv_path)
    data = [{'title': row['title'], 'text': clean_text(row['text'])} for _, row in df.iterrows()]
else:
    for file in os.listdir(csv_path):
        if file.endswith('.csv'):
            df = pd.read_csv(os.path.join(csv_path, file))
            data.extend([{'title': row['title'], 'text': clean_text(row['text'])} for _, row in df.iterrows()])

# Format samples with varied responses
responses = [
    "Try a concise approach: 'Hey, loved our chat! Free for coffee this weekend?' Avoid over-texting.",
    "Wait 1–2 days, then send a light message like: 'Hey, how’s it going?' Don’t sound too eager.",
    "Keep it playful: 'Hey, still thinking about that great convo! Up for a drink?' Avoid long texts."
]
import random
formatted_samples = []
for item in data:
    title = item['title'].lower()
    sample = (
        f"[Scenario]: {item['title']}\n"
        f"{item['text']}\n"
        f"[Assistant]: For {title}, {random.choice(responses)}"
    )
    formatted_samples.append(sample)

# Use full dataset
dataset = Dataset.from_dict({'text': formatted_samples})

# Load tokenizer
try:
    tokenizer = AutoTokenizer.from_pretrained(checkpoint_path)
    tokenizer.pad_token = tokenizer.eos_token
except Exception as e:
    print(f"Tokenizer load failed: {e}")
    raise

# Load model from checkpoint
try:
    model = AutoModelForCausalLM.from_pretrained(checkpoint_path)
    model.gradient_checkpointing_enable()
except Exception as e:
    print(f"Model load failed: {e}")
    raise

torch.cuda.empty_cache()

# Tokenize dataset
def tokenize(examples):
    tokenized = tokenizer(
        examples['text'],
        truncation=True,
        padding='max_length',
        max_length=128
    )
    tokenized['labels'] = tokenized['input_ids'].copy()
    return tokenized
tokenized_dataset = dataset.map(tokenize, batched=True)

# Split dataset
train_dataset = tokenized_dataset.shuffle().select(range(int(0.8 * len(tokenized_dataset))))
eval_dataset = tokenized_dataset.shuffle().select(range(int(0.8 * len(tokenized_dataset)), len(tokenized_dataset)))

# Training arguments
training_args = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=30,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=2,
    learning_rate=1e-5,
    warmup_steps=100,
    max_grad_norm=1.0,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=100,
    eval_strategy='steps',
    eval_steps=500,
    save_strategy='steps',
    save_steps=500,
    save_total_limit=3,
    save_safetensors=False,
    load_best_model_at_end=True,
    fp16=True,
    gradient_checkpointing=True,
    lr_scheduler_type='linear',
    report_to='none'
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    callbacks=[
        MemoryLoggingCallback(),
        SaveCheckpointCallback(),
        EarlyStoppingCallback(early_stopping_patience=5, early_stopping_threshold=0.01)
    ]
)

# Resume training
try:
    trainer.train(resume_from_checkpoint=checkpoint_path)
except Exception as e:
    print(f"Training failed: {e}")
    try:
        close_file_handles(os.path.join(output_dir, 'final_new'))
        model.save_pretrained(os.path.join(output_dir, 'final_new'), save_safetensors=False)
        tokenizer.save_pretrained(os.path.join(output_dir, 'final_new'))
    except Exception as save_e:
        print(f"Final save failed: {save_e}")
    raise

# Save final model
for attempt in range(3):
    try:
        close_file_handles(os.path.join(output_dir, 'final_new'))
        model.save_pretrained(os.path.join(output_dir, 'final_new'), save_safetensors=False)
        tokenizer.save_pretrained(os.path.join(output_dir, 'final_new'))
        print("Final model saved to final_new")
        break
    except Exception as e:
        print(f"Final save attempt {attempt+1} failed: {e}")
        time.sleep(5)
        if attempt == 2:
            print("Final save failed after 3 attempts")

Inference code

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Set device
device = torch.device('cuda:0')
# Clear GPU memory
torch.cuda.empty_cache()

# Load tokenizer
try:
    tokenizer = AutoTokenizer.from_pretrained(r'D:/Python/dating coach/dating_coach/dating_coach_gpt2/final_new')
except Exception as e:
    print(f"Tokenizer load failed: {e}")
    exit(1)

# Load model
try:
    model = AutoModelForCausalLM.from_pretrained(r'D:/Python/dating coach/dating_coach/dating_coach_gpt2/final_new')
    model.to(device)  # Move model to GPU
    model.eval()  # Set to evaluation mode
except Exception as e:
    print(f"Model load failed: {e}")
    exit(1)

# Query
query = "User: What advice do you have for mistakes guys make after getting her number? [Scenario]: Mistakes Guys Make After Getting Her Number"

# Tokenize and move inputs to GPU
try:
    inputs = tokenizer(query, return_tensors='pt', truncation=True, max_length=128)
    inputs = {k: v.to(device) for k, v in inputs.items()}  # Move all tensors to cuda:0

    # # Verify device
    # for k, v in inputs.items():
    #     print(f"{k} device: {v.device}")
    # print(f"Model device: {next(model.parameters()).device}")

    # Generate
    with torch.no_grad():  # Disable gradient computation
        outputs = model.generate(
            **inputs,
            max_new_tokens=150,
            do_sample=True,
            top_p=0.95,
            temperature=1,
            pad_token_id=tokenizer.eos_token_id,
            top_k=50, 
            repetition_penalty=2,
            stop_sequences=['[Assistant]', '.']
        )

    # Decode
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    cleaned_response = re.sub(r'\[.*?\]', '', response).strip()
    print(f"Response: {cleaned_response}")
except Exception as e:
    print(f"Inference failed: {e}")

In [None]:
import torch
import re
from transformers import AutoTokenizer, AutoModelForCausalLM

def clean_response(text):
    # Remove special tokens and fix framing
    text = re.sub(r'\[.*?\]', '', text).strip()  # Remove [Scenario], [Assistant]
    text = re.sub(r'<\|endoftext\|>', '', text).strip()
    # Split into sentences, ensure complete
    sentences = [s.strip() for s in text.split('. ') if s.strip()]
    if sentences and not sentences[-1].endswith('.'):
        sentences[-1] += '.'
    # Ensure solution-focused
    if not any(word in text.lower() for word in ['avoid', 'suggest', 'try', 'action']):
        sentences.append("To address this, focus on clear communication and positive engagement.")
    return ' '.join(sentences)

device = torch.device('cuda:0')
torch.cuda.empty_cache()

# Load model and tokenizer
model_path = 'D:/Python/dating coach/dating_coach/dating_coach_gpt2/final_new'  # Adjust
try:
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForCausalLM.from_pretrained(model_path)
    model.to(device)
    model.eval()
except Exception as e:
    print(f"Load failed: {e}")
    exit(1)

# Detailed prompt
query = (
    '''Mistakes Guys Make After Getting female number and provide Common mistakes include texting too much too soon, being overly eager, or not following up quickly enough.
    Provide a detailed, well-structured response with practical advice to address the scenario. Explain how to avoid each mistake and suggest specific actions. Ensure the response is positive, concise, and ends with a complete sentence.'''
)

try:
    inputs = tokenizer(query, return_tensors='pt', truncation=True, max_length=128)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=150,  # Longer output
            do_sample=True,
            top_k=50,
            top_p=0.95,  # Diverse
            temperature=1.0,  # Creative
            repetition_penalty=2.0,  # Avoid repetition
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id  # Stop at end
        )
    response = tokenizer.decode(outputs[0], skip_special_tokens=False)
    cleaned_response = clean_response(response)
    print(f"Response: {cleaned_response}")
except Exception as e:
    print(f"Inference failed: {e}")

In [None]:
%%writefile app.py
import torch
import re
from transformers import AutoTokenizer, AutoModelForCausalLM

def clean_response(text):
    # Remove special tokens and irrelevant phrases
    text = re.sub(r'\[.*?\]', '', text).strip()  # Remove [Scenario], [Assistant]
    text = re.sub(r'<\|endoftext\|>', '', text).strip()
    # Remove off-topic phrases (e.g., "bhaiya", "good night")
    irrelevant = r'\b(bhaiya|yaar|bro|good night|welcome back|thank god|complaints)\b'
    text = re.sub(irrelevant, '', text, flags=re.IGNORECASE).strip()
    # Split into sentences, ensure complete
    sentences = [s.strip() for s in text.split('. ') if s.strip()]
    if sentences and not sentences[-1].endswith('.'):
        sentences[-1] += '.'
    # Ensure scenario-specific advice
    required_phrases = ['texting too much', 'overly eager', 'following up']
    if not any(phrase in text.lower() for phrase in required_phrases):
        sentences.append(
            "To avoid texting too much, send one casual message within 24 hours. "
            "Prevent being overly eager by matching her texting pace. "
            "Follow up within 1–2 days to show interest."
        )
    return ' '.join(sentences)

device = torch.device('cuda:0')
torch.cuda.empty_cache()

# Load model and tokenizer
model_path = 'D:/Python/dating coach/dating_coach/dating_coach_gpt2/final_new'  # Adjust
try:
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForCausalLM.from_pretrained(model_path)
    model.to(device)
    model.eval()
except Exception as e:
    print(f"Load failed: {e}")
    exit(1)

# Stricter prompt
query = (
    '''Mistakes Guys Make After Getting female number and provide Common mistakes include texting too much too soon, being overly eager, or not following up quickly enough.
    Provide a detailed, well-structured response with practical advice to address the scenario. Explain how to avoid each mistake and suggest specific actions. Ensure the response is positive, concise, and ends with a complete sentence.'''
)

try:
    inputs = tokenizer(query, return_tensors='pt', truncation=True, max_length=128)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=200,  # Allow more detail
            do_sample=True,
            top_k=50,
            top_p=0.95,
            temperature=0.9,  # Less rambling
            repetition_penalty=2.0,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id
        )
    response = tokenizer.decode(outputs[0], skip_special_tokens=False)
    cleaned_response = clean_response(response)
    print(f"Response: {cleaned_response}")
except Exception as e:
    print(f"Inference failed: {e}")

In [None]:
from fastapi import FastAPI
import torch
import re
from transformers import AutoTokenizer, AutoModelForCausalLM

def clean_response(text):
    text = re.sub(r'\[.*?\]', '', text).strip()
    text = re.sub(r'<\|endoftext\|>', '', text).strip()
    irrelevant = r'\b(bhaiya|yaar|bro|good night|welcome back|thank god|complaints|subscribe|social media|channel|followers|mentorship)\b'
    text = re.sub(irrelevant, '', text, flags=re.IGNORECASE).strip()
    sentences = [s.strip() for s in text.split('. ') if s.strip()]
    if sentences and not sentences[-1].endswith('.'):
        sentences[-1] += '.'
    required_phrases = ['texting too much', 'overly eager', 'following up']
    response_lower = text.lower()
    missing_phrases = [phrase for phrase in required_phrases if phrase not in response_lower]
    if missing_phrases:
        
        additions = []
        if 'texting too much' in missing_phrases:
            additions.append("To avoid texting too much, send one casual message within 24 hours, like: 'Hey, great meeting you! Free for coffee?'")
        if 'overly eager' in missing_phrases:
            additions.append("Prevent being overly eager by matching her texting pace with one text daily, such as: 'Hey, loved our chat! What’s up?'")
        if 'following up' in missing_phrases:
            additions.append("Follow up within 1–2 days to show interest, e.g., 'Hey, how’s it going? Had fun talking!'")
        sentences.extend(additions)
    negative = r'\b(negative|stuck|struggling|reject|pressure|less interested)\b'
    text = re.sub(negative, 'positive', text, flags=re.IGNORECASE)
    return '\n'.join([f"- {s}" for s in sentences if s]) + '\nThese steps foster a positive connection.'

app = FastAPI()
device = torch.device('cuda:0')

try:
    tokenizer = AutoTokenizer.from_pretrained(r'D:/Python/dating_coach/d_coach/dating_coach_gpt2/final_new')
    model = AutoModelForCausalLM.from_pretrained(r'D:/Python/dating_coach/d_coach/dating_coach_gpt2/final_new')
    model.to(device)
    model.eval()
except Exception as e:
    print(f"Model load failed: {e}")
    exit(1)

@app.post("/chat")
async def chat(query: str):
    try:
        prompt = (
            f"{query}\n"
            f"Provide a detailed, well-structured response addressing each mistake. Use bullet points for clarity. For each mistake: explain how to avoid it, suggest a specific action (e.g., example text message), and maintain a positive tone. Ensure the response is concise, ends with a complete sentence, and avoids irrelevant topics like social media or unrelated activities."
        )
        inputs = tokenizer(prompt, return_tensors='pt', truncation=True, max_length=128)
        inputs = {k: v.to(device) for k, v in inputs.items()}
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=200,
                do_sample=True,
                top_k=50,
                top_p=0.95,
                temperature=0.8,
                repetition_penalty=2.0,
                pad_token_id=tokenizer.eos_token_id,
                eos_token_id=tokenizer.eos_token_id
            )
        response = tokenizer.decode(outputs[0], skip_special_tokens=False)
        cleaned_response = clean_response(response)
        return {"response": cleaned_response}
    except Exception as e:
        return {"error": str(e)}

```python
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import torch
import re
from transformers import AutoTokenizer, AutoModelForCausalLM
import pandas as pd
import os

def clean_response(text, query):
    text = re.sub(re.escape(query), '', text, flags=re.IGNORECASE).strip()
    text = re.sub(r'\[.*?\]', '', text).strip()
    text = re.sub(r'<\|endoftext\|>', '', text).strip()
    irrelevant = r'\b(bhaiya|yaar|bro|good night|welcome back|thank god|complaints|subscribe|social media|channel|followers|mentorship|geekhoopermusic|eyeembrace|blueprint|clicking here|online dating)\b'
    text = re.sub(irrelevant, '', text, flags=re.IGNORECASE).strip()
    sentences = [s.strip() for s in text.split('. ') if s.strip()]
    if sentences and not sentences[-1].endswith('.'):
        sentences[-1] += '.'
    required_phrases = ['texting too much', 'overly eager', 'following up']
    response_lower = text.lower()
    missing_phrases = [phrase for phrase in required_phrases if phrase not in response_lower]
    if missing_phrases or not sentences:
        additions = []
        if 'texting too much' in missing_phrases or not sentences:
            additions.append("To avoid texting too much, send one casual message within 24 hours, like: 'Hey, great meeting you! Free for coffee?'")
        if 'overly eager' in missing_phrases or not sentences:
            additions.append("Prevent being overly eager by matching her texting pace with one text daily, such as: 'Hey, loved our chat! What’s up?'")
        if 'following up' in missing_phrases or not sentences:
            additions.append("Follow up within 1–2 days to show interest, e.g., 'Hey, how’s it going? Had fun talking!'")
        sentences = additions
    negative = r'\b(negative|stuck|struggling|reject|pressure|less interested)\b'
    text = re.sub(negative, 'positive', text, flags=re.IGNORECASE)
    return '\n'.join([f"- {s}" for s in sentences if s]) + '\nThese actions foster a positive connection.'

device = torch.device('cuda:0')
model_path = 'D:/Python/dating_coach/d_coach/dating_coach_gpt2/final_new'
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path).to(device).eval()

# Load dataset
data = []
csv_path = r'D:/Python/dating_coach/d_coach/data.csv'
if os.path.isfile(csv_path):
    df = pd.read_csv(csv_path)
    data = [{'title': row['title'], 'text': str(row['text'])} for _, row in df.iterrows()]
documents = [f"{item['title']}: {item['text']}" for item in data]

# Embed documents
embedder = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = embedder.encode(documents, convert_to_numpy=True)
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

# Query
query = "Mistakes Guys Make After Getting Her Number\nCommon mistakes include texting too much too soon, being overly eager, or not following up quickly enough."
query_embedding = embedder.encode([query])
D, I = index.search(query_embedding, k=3)
context = "\n".join([documents[i] for i in I[0]])

# Generate
prompt = (
    f"Context: {context}\n"
    f"{query}\n"
    f"Respond with a concise, positive, and well-structured answer using bullet points. Address each mistake (texting too much, being overly eager, not following up) with an explanation of how to avoid it and a specific action (e.g., example text message). Avoid irrelevant topics like social media or unrelated activities."
)
inputs = tokenizer(prompt, return_tensors='pt', truncation=True, max_length=256).to(device)
with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=150,
        do_sample=True,
        top_k=50,
        top_p=0.95,
        temperature=0.6,
        repetition_penalty=2.0,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id
    )
response = tokenizer.decode(outputs[0], skip_special_tokens=False)
cleaned_response = clean_response(response, query)
print(f"Response:\n{cleaned_response}")
```

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(
    "mistralai/Mistral-7B-Instruct-v0.1",
    local_files_only=True
)


In [None]:
import torch
import psutil
from transformers import AutoTokenizer, AutoModelForCausalLM

# Check available RAM before
mem = psutil.virtual_memory()
print(f"Before loading: {mem.available / (1024 ** 3):.2f} GB free")
from transformers import BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(load_in_8bit=True)
# Load model (change model path)
model_path = "mistralai/Mistral-7B-Instruct-v0.1"
model = AutoModelForCausalLM.from_pretrained(
                                            model_path, 
                                            quantization_config=bnb_config,  # if supported
                                            device_map="auto", 
                                            local_files_only=True
                                            )
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Check available RAM after
mem = psutil.virtual_memory()
print(f"After loading: {mem.available / (1024 ** 3):.2f} GB free")


In [None]:
import psutil
import threading
import time
from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer

def resource_monitor(ram_threshold=90, cpu_threshold=90):
    while True:
        mem = psutil.virtual_memory()
        cpu = psutil.cpu_percent(interval=1)
        if mem.percent >= ram_threshold or cpu >= cpu_threshold:
            raise RuntimeError(
                f"ERROR: Resource usage exceeded threshold!\n"
                f"RAM: {mem.percent:.2f}% used ({mem.used / (1024 ** 3):.2f} GB / {mem.total / (1024 ** 3):.2f} GB)\n"
                f"CPU: {cpu:.2f}% used"
            )
        time.sleep(2)  # Check every 2 seconds

# Start resource monitor in background
monitor_thread = threading.Thread(target=resource_monitor, args=(90, 90), daemon=True)
monitor_thread.start()

bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,
    llm_int8_enable_fp32_cpu_offload=True  # critical fix!
)
model_path = "mistralai/Mistral-7B-Instruct-v0.1"
model = AutoModelForCausalLM.from_pretrained(
    model_path, 
    quantization_config=bnb_config,  # if supported
    device_map="auto", 
    local_files_only=True
)