<a href="https://colab.research.google.com/github/mkumar84/sandbox/blob/master/Generative_AI_based_Proactive_Risk_Prevention_solution.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from transformers import pipeline, GPT2LMHeadModel, GPT2Tokenizer
import requests


In [2]:
customer_data = pd.DataFrame({
    'customer_id': [1, 2, 3],
    'name': ['Alice', 'Bob', 'Charlie'],
    'location': ['Toronto', 'Vancouver', 'Calgary'],
    'assets_insured': ['Car', 'Home', 'Car, Home']
})


In [3]:
def fetch_weather_data(location):
    API_KEY = "your_weather_api_key"
    url = f"http://api.openweathermap.org/data/2.5/weather?q={location}&appid={API_KEY}"
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        return None

# Example weather data for each customer
customer_data['weather_alert'] = customer_data['location'].apply(fetch_weather_data)


In [4]:
historical_claims = pd.DataFrame({
    'event': ['Hailstorm', 'Flood', 'Windstorm'],
    'damage_cost': [5000, 10000, 2000],
    'location': ['Toronto', 'Calgary', 'Vancouver'],
    'alert': ['Move car to covered space', 'Secure basement', 'Trim tree branches']
})


In [5]:
training_data = [
    "Event: Hailstorm, Location: Toronto -> Move your car to a covered space.",
    "Event: Flood, Location: Calgary -> Secure your basement to prevent water damage.",
    "Event: Windstorm, Location: Vancouver -> Trim tree branches to reduce risk of falling debris."
]

with open("training_data.txt", "w") as f:
    f.write("\n".join(training_data))


In [10]:
dataset = load_dataset("/content/training_data.txt", tokenizer)


In [18]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel, TextDataset, DataCollatorForLanguageModeling, Trainer, TrainingArguments
import os
import torch # Import torch

# Load pretrained GPT-2 model and tokenizer
model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

# Add the padding token to the tokenizer
tokenizer.pad_token = tokenizer.eos_token
model = GPT2LMHeadModel.from_pretrained(model_name)


# Prepare the dataset
def load_dataset(filepath, tokenizer):
    # Ensure the file exists and is not empty
    try:
        with open(filepath, "r") as f:
            content = f.read()
            if not content:
                raise ValueError(f"The file at {filepath} is empty.")
    except FileNotFoundError:
        raise FileNotFoundError(f"The file at {filepath} was not found.")

    # Tokenize the entire content of the file
    encodings = tokenizer(content, return_encodings=True)

    # Create a custom dataset class
    class CustomDataset(object):
        def __init__(self, encodings):
            self.encodings = encodings
        def __getitem__(self, idx):
            return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()} # Now torch is defined
        def __len__(self):
            return len(self.encodings.input_ids)

    # Return an instance of the custom dataset
    return CustomDataset(encodings)


# Correct file path (add a './' before to specify current dir)
dataset = load_dataset("./training_data.txt", tokenizer)
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./gpt2-risk-alerts",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=4,
    save_steps=500,
    save_total_limit=2,
    prediction_loss_only=True
)

# Train the model
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    data_collator=data_collator
)
trainer.train()
model.save_pretrained("./gpt2-risk-alerts")
tokenizer.save_pretrained("./gpt2-risk-alerts")

Keyword arguments {'return_encodings': True} not recognized.


Step,Training Loss


('./gpt2-risk-alerts/tokenizer_config.json',
 './gpt2-risk-alerts/special_tokens_map.json',
 './gpt2-risk-alerts/vocab.json',
 './gpt2-risk-alerts/merges.txt',
 './gpt2-risk-alerts/added_tokens.json')

In [19]:
from transformers import pipeline

# Load fine-tuned model
risk_alert_model = pipeline("text-generation", model="./gpt2-risk-alerts")

# Generate alerts for customers
def generate_alert(event, location):
   input_text = f"Event: {event}, Location: {location} ->"
   alert = risk_alert_model(input_text, max_length=50, num_return_sequences=1)
   return alert[0]['generated_text'].split("->")[1]

# Example: Generating alerts
customer_data['proactive_alert'] = customer_data.apply(
   lambda row: generate_alert('Hailstorm', row['location']), axis=1
)
print(customer_data[['name', 'location', 'proactive_alert']])


Device set to use cpu
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


      name   location                                    proactive_alert
0    Alice    Toronto   Hailstorm, Damage Duration: 5 and above, dama...
1      Bob  Vancouver                            Rainstorm, Location (1 
2  Charlie    Calgary  , Location in a rainstorm. Hailstorm:\n\nDamag...


In [25]:
import smtplib
from email.mime.text import MIMEText

def send_email(to_email, subject, message):
    # Replace with your actual SMTP server and credentials
    smtp_server = "smtp.mail.com"  # For example, Gmail's SMTP server
    from_email = "maheshkumar@techie.com"  # Replace with your email address
    password = ""  # Replace with your email password

    msg = MIMEText(message)
    msg['Subject'] = subject
    msg['From'] = from_email
    msg['To'] = to_email

    with smtplib.SMTP(smtp_server, 587) as server:
        server.starttls()
        server.login(from_email, password)
        server.sendmail(from_email, to_email, msg.as_string())

# Example: Notify a customer
send_email("mahitherealtor@gmail.com", "Proactive Risk Alert", "A hailstorm is expected in your area. Move your car to a covered space.")

SMTPAuthenticationError: (535, b'Authentication credentials invalid')