In [1]:
!pip install -q -U google-generativeai

In [27]:
import pandas as pd
import google.generativeai as genai
from textblob import TextBlob
from google.colab import files
import time

# Configure Gemini
GOOGLE_API_KEY = 'AIzaSyAbnIUYsUzhd4cT6plVhf6thQYZh7ciPMw'
genai.configure(api_key=GOOGLE_API_KEY)

# Upload the CSV file
uploaded = files.upload()

# Load the uploaded CSV file
filename = next(iter(uploaded))
df = pd.read_csv(filename)

# Print DataFrame info
print(df.columns)
print(df.head())

# Gemini model setup
model = genai.GenerativeModel(model_name='gemini-1.5-pro', generation_config={"temperature": 0})

system_prompt = """
You are an AI assistant specialized in analyzing news articles related to the cement sector.
Your task is to evaluate the relevance of the content within these articles (direct or indirect relevance).

For each article, you should:
Assign a relevance score between -10 and 10, where:
   - -10 indicates a highly negative impact or relevance between the news and associated tags (you can take the context of the tags from within the news)
   - 0 indicates neutral or no relevance between the news and associated tags (you can take the context of the tags from within the news)
   - 10 indicates a highly positive impact or relevance between the news and associated tags (you can take the context of the tags from within the news)

The relevance can be highly direct or somewhat indirect relevance.

Provide your response as a single integer score for each article.
"""

def analyze_content(news, tags, retries=3):
    prompt = f"News: {news}\nTags: {tags}\n\nBased on the above, provide a relevance score between -10 and 10 for this content in the context of the cement sector. Only provide the score as a single integer."

    for attempt in range(retries):
        response = model.generate_content(system_prompt + "\n" + prompt)
        try:
            score_str = response.text.strip().split()[0]
            score = int(score_str)
            return max(-10, min(10, score))
        except ValueError:
            print(f"Attempt {attempt + 1} failed: {response.text.strip()}")

    sentiment = TextBlob(news).sentiment.polarity
    return int(sentiment * 10)

# Process the data in batches
results = []

BATCH_SIZE = 2
start_batch = 0  # Start from batch 16 (i.e., after batch 15)
end_batch = 15

for i in range(start_batch * BATCH_SIZE, min(end_batch * BATCH_SIZE, len(df)), BATCH_SIZE):
    batch = df.iloc[i:i+BATCH_SIZE]
    batch_results = []

    for index, row in batch.iterrows():
        news = row['News']
        tags = row['Tags']
        event_score = analyze_content(news, tags)
        batch_results.append({
            'event_id': index,
            'news': news,
            'tags': tags,
            'event_score': event_score
        })

    if i + BATCH_SIZE < len(df):
        print(f"Waiting 60 seconds after processing batch {(i // BATCH_SIZE) + 1}...")
        time.sleep(60)

    results.extend(batch_results)

# Create the enhanced dataset
enhanced_df = pd.DataFrame(results)

# Save the enhanced dataset
output_filename = "enhanced_cement_sector_news_events2.csv"
enhanced_df.to_csv(output_filename, index=False)

# Download the enhanced dataset
files.download(output_filename)
print(f"\nEnhanced dataset saved and downloaded as '{output_filename}'")


Saving week7task3badfeaturesresults.csv to week7task3badfeaturesresults (24).csv
Index(['News', 'Tags'], dtype='object')
                                                News  \
0  ISLAMABAD: The KSE-100 index of the Pakistan S...   
1  ISLAMABAD: The exchange rate of Pakistani rupe...   
2  ISLAMABAD: The exchange rate of Pakistani rupe...   
3  ISLAMABAD: The KSE-100 index of the Pakistan S...   
4  ISLAMABAD: The exchange rate of Pakistani rupe...   

                                                Tags  
0  ['Price of shares stood at Rs23.514 billion ag...  
1  ['Rupee strengthened by 55 paisas against the ...  
2  ['Rupee strengthened by 55 paisas against the ...  
3  ['Price of shares stood at Rs23.514 billion ag...  
4  ['Rupee strengthened by 55 paisas against the ...  
Waiting 60 seconds after processing batch 1...
Waiting 60 seconds after processing batch 2...
Waiting 60 seconds after processing batch 3...
Waiting 60 seconds after processing batch 4...
Waiting 60 seconds after

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


Enhanced dataset saved and downloaded as 'enhanced_cement_sector_news_events2.csv'


In [31]:
import pandas as pd
import google.generativeai as genai
from textblob import TextBlob
from google.colab import files
import time

# Configure Gemini
GOOGLE_API_KEY = 'AIzaSyCIwDBoz24begSLjU61biFieAdVVZUNqHs'
genai.configure(api_key=GOOGLE_API_KEY)

# Upload the CSV file
uploaded = files.upload()

# Load the uploaded CSV file
filename = next(iter(uploaded))
df = pd.read_csv(filename)

# Print DataFrame info
print(df.columns)
print(df.head())

# Gemini model setup
model = genai.GenerativeModel(model_name='gemini-1.5-pro', generation_config={"temperature": 0})

system_prompt = """
You are an AI assistant specialized in analyzing news articles related to the cement sector.
Your task is to evaluate the relevance of the content within these articles (direct or indirect relevance).

For each article, you should:
Assign a relevance score between -10 and 10, where:
   - -10 indicates a highly negative impact or relevance between the news and associated tags (you can take the context of the tags from within the news)
   - 0 indicates neutral or no relevance between the news and associated tags (you can take the context of the tags from within the news)
   - 10 indicates a highly positive impact or relevance between the news and associated tags (you can take the context of the tags from within the news)

The relevance can be highly direct or somewhat indirect relevance.

Provide your response as a single integer score for each article.
"""

def analyze_content(news, tags, retries=3):
    prompt = f"News: {news}\nTags: {tags}\n\nBased on the above, provide a relevance score between -10 and 10 for this content in the context of the cement sector. Only provide the score as a single integer."

    for attempt in range(retries):
        response = model.generate_content(system_prompt + "\n" + prompt)
        try:
            score_str = response.text.strip().split()[0]
            score = int(score_str)
            return max(-10, min(10, score))
        except ValueError:
            print(f"Attempt {attempt + 1} failed: {response.text.strip()}")

    sentiment = TextBlob(news).sentiment.polarity
    return int(sentiment * 10)

# Process the data in batches
results = []

BATCH_SIZE = 2
start_batch = 15  # Start from batch 16 (i.e., after batch 15)
end_batch = (len(df) + BATCH_SIZE - 1) // BATCH_SIZE

for i in range(start_batch * BATCH_SIZE, min(end_batch * BATCH_SIZE, len(df)), BATCH_SIZE):
    batch = df.iloc[i:i+BATCH_SIZE]
    batch_results = []

    for index, row in batch.iterrows():
        news = row['News']
        tags = row['Tags']
        event_score = analyze_content(news, tags)
        batch_results.append({
            'event_id': index,
            'news': news,
            'tags': tags,
            'event_score': event_score
        })

    if i + BATCH_SIZE < len(df):
        print(f"Waiting 60 seconds after processing batch {(i // BATCH_SIZE) + 1}...")
        time.sleep(60)

    results.extend(batch_results)

# Create the enhanced dataset
enhanced_df = pd.DataFrame(results)

# Save the enhanced dataset
output_filename = "enhanced_cement_sector_news_events.csv"
enhanced_df.to_csv(output_filename, index=False)

# Download the enhanced dataset
files.download(output_filename)
print(f"\nEnhanced dataset saved and downloaded as '{output_filename}'")


Saving week7task3badfeaturesresults.csv to week7task3badfeaturesresults (26).csv
Index(['News', 'Tags'], dtype='object')
                                                News  \
0  ISLAMABAD: The KSE-100 index of the Pakistan S...   
1  ISLAMABAD: The exchange rate of Pakistani rupe...   
2  ISLAMABAD: The exchange rate of Pakistani rupe...   
3  ISLAMABAD: The KSE-100 index of the Pakistan S...   
4  ISLAMABAD: The exchange rate of Pakistani rupe...   

                                                Tags  
0  ['Price of shares stood at Rs23.514 billion ag...  
1  ['Rupee strengthened by 55 paisas against the ...  
2  ['Rupee strengthened by 55 paisas against the ...  
3  ['Price of shares stood at Rs23.514 billion ag...  
4  ['Rupee strengthened by 55 paisas against the ...  
Waiting 60 seconds after processing batch 16...
Waiting 60 seconds after processing batch 17...
Waiting 60 seconds after processing batch 18...
Waiting 60 seconds after processing batch 19...
Waiting 60 seconds a

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


Enhanced dataset saved and downloaded as 'enhanced_cement_sector_news_events.csv'
