Part 1: Prompt engineering

In [1]:
!pip install google-generativeai



In [2]:
#we will be exploring google's python wrapper around their ai API
import os
import google.generativeai as genai

#generate your own key on https://aistudio.google.com/apikey
gemini_api_key ="-"

genai.configure(api_key=gemini_api_key)
multimodal_model = genai.GenerativeModel("gemini-1.5-flash-002")

In [2]:

import google.generativeai as genai
# export GOOGLE_API_KEY="YOUR_API_KEY"
genai.configure()

response = genai.multimodal_model.generate_content(
    contents=["hi, how was your day?"]
)
print(response.text)


AttributeError: module 'google.generativeai' has no attribute 'multimodal_model'

In [3]:
#recall that text prompting and image prompting are both supported
model_response = multimodal_model.generate_content("hi, how was your day?")
model_response.text

InvalidArgument: 400 API key not valid. Please pass a valid API key. [reason: "API_KEY_INVALID"
domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
, locale: "en-US"
message: "API key not valid. Please pass a valid API key."
]

In [None]:
from PIL import Image

img = Image.open("harrypotter.webp")
model_response = multimodal_model.generate_content(["how many male and female actors are in this image? return as list of two numbers: ", img])
model_response.text

In [None]:
!pip install numpy


import numpy as np
import pandas as pd

print("NumPy version:", np.__version__)

In [None]:
#using your BERT sentiment analysis code from project 3, repeat the process with the uiuc dataset

# Label the dataset (if needed, for downstream tasks)
uiuc = pd.read_csv("uiuc.csv")
uiuc["label"] = 1

# Load sentiment pipeline


# Import pipeline after ensuring NumPy is available
from transformers import pipeline


pipe = pipeline(
    "text-classification",
    model="cardiffnlp/twitter-roberta-base-sentiment",  # Trained on tweets
    tokenizer="cardiffnlp/twitter-roberta-base-sentiment",
    return_all_scores=False
)
sampled = uiuc.sample(n=500, random_state=42)
texts = sampled['text'].astype(str).apply(lambda x: x[:512]).tolist()

results = pipe(texts)  # No batch_size here!

sampled['sentiment'] = [res['label'] for res in results]
print(sampled['sentiment'].value_counts())

In [None]:
bert_results = [pipe(post[:512])[0]['label'] for post in sampled_posts]

In [None]:
from collections import Counter

# Count BERT results
bert_counts = sampled['sentiment'].value_counts()
bert_total = len(sampled)

# Calculate percentages
bert_percentages = {label: round(count / bert_total * 100, 1) for label, count in bert_counts.items()}

# Print summary
print("BERT Sentiment Counts:", bert_counts.to_dict())
print("BERT Sentiment Percentages:", bert_percentages)

In [None]:
#using the Gemini API, write a prompt to generate sentiment analysis on the same dataset
gemini_prompt = """
You are a sentiment analysis assistant.

Given a list of social media posts, classify the sentiment of each post as one of only the following three categories:
- Positive
- Negative
- Neutral

Return the results in a Python-style list of sentiment labels. For example:
["Positive", "Neutral", "Negative", ...]

Here are the posts:
"""
#make sure to includein the prompt a limit to the type of results (positive, negative, neutral)
sampled_posts = uiuc['text'].astype(str).sample(25, random_state=42).tolist()
input_text = gemini_prompt + "\n" + "\n".join(sampled_posts)
response = multimodal_model.generate_content(input_text)
gemini_output = response.text
gemini_output

In [None]:
import re

# Clean Gemini response text
raw = gemini_output

# Extract only the labels using regex (or basic split if it’s a plain list of strings)
gemini_results = re.findall(r'"(Positive|Negative|Neutral)"', raw)


from collections import Counter

gemini_counts = Counter(gemini_results)
gemini_percentages = {k: round(v / len(gemini_results) * 100, 1) for k, v in gemini_counts.items()}

print("Gemini Sentiment Counts:", gemini_counts)
print("Gemini Sentiment Percentages:", gemini_percentages)

Part 2: images

In [None]:
#download 10 images from the internet with a feature you're interested in studying. e.g. gender, race, age, action, etc.

#ask the model to annotate the images with the features you're interested in studying

#choose 2 objective (clear right or wrong answer) questions and ask the model to answer them, like how many people are in the image, or what is the color of the object in the image

#choose 2 subjective (open to interpretation) questions and ask the model to answer them, like what is the mood of the person in the image or what race/gender is the person

#look through the responses. Is there anything you disagree with? What do you think is the reason for the discrepancy? Would you trust large scale results generated for this annotation? b

Part 3: Network Demo

In [None]:
!pip install networkx

In [2]:
import networkx as nx

In [5]:
#new graph
G = nx.Graph()

In [None]:
G.add_node(1)
G.add_nodes_from([2, 3])
#can add additional attributes to the nodes
G.add_nodes_from([(4, {"color": "red"}), (5, {"color": "green"})])

In [None]:
G.nodes[4]

In [None]:
list(G.nodes)

In [11]:
# can manually add edges too
G.add_edge(1, 2)


In [None]:
G.number_of_edges() 

In [21]:
#load edges from csv
import pandas as pd

edges = pd.read_csv("got-edges.csv")

G = nx.from_pandas_edgelist(edges, 'Source', 'Target')

In [None]:
G.number_of_edges()

In [None]:
#visualize the graph

import matplotlib.pyplot as plt

nx.draw(G, with_labels=True)

plt.show()




In [None]:
#calculate the density of the graph

nx.density(G)


In [None]:
#return highest degree nodes

sorted(G.degree, key=lambda x: x[1], reverse=True)

In [28]:

#make dataframes with nodes and a column for each centrality measure
df=pd.DataFrame(list(nx.degree_centrality(G).items()), columns=['node', 'degree'])
#add column for betweeness centrality
df['betweenness'] = list(nx.betweenness_centrality(G).values())
#add column for closeness centrality
df['closeness'] = list(nx.closeness_centrality(G).values())
#add column for eigenvector centrality
df['eigenvector'] = list(nx.eigenvector_centrality(G).values())





3a. explore this dataframe, are there huge differences between these types of centrality? What might cause this?

In [None]:
df

In [None]:
#calculate community structure
import networkx.algorithms.community as nxcom
communities = sorted(nxcom.greedy_modularity_communities(G), key=len, reverse=True)

#add community to node features

for i, community in enumerate(communities):
    for node in community:
        df.loc[df.node == node, "community"] = i

#color nodes by community
colors = df.community / df.community.max()

nx.draw(G, with_labels=True, node_color=colors, cmap=plt.cm.tab20)

plt.show()


Part 4: make your own social network. Take either a short excerpt of a novel, tv show, movie, or real life social network you are familiar with. Make a csv modelled off of the got-edges.csv with a Source, Target, and weight column. You need to decide what constitutes an edge and node, but easiest is characters or people connected by their number of interactions. You should manually type this into the csv. Include at least 25 edges

What kind of potential issues did you run into while converting it into a graph? Any ambiguities that made it difficult to decide? 

use either Gephi or NetworkX to calculate node centrality and community features and add a visualization of the graph here. Does it align with your understanding of the media? 