### Imports

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import openai
import pandas as pd
from langchain.llms import OpenAI
import random
import re

### sampling methods

In [3]:
def sample_user(df, n=1):
    users = df['user_id'].drop_duplicates().sample(n=n, random_state=1)
    return users

def sample_tweet(df, user, n=20):
    df = df[df['user_id']== user]['cleaned_tweets']
    if df.shape[0] > n:
        return df.sample(n=n, random_state=1)
    else:
        return df
    
def sample_tweet_group(df, users, n=20):
    result = pd.DataFrame({'user_id':[], 'cleaned_tweets':[]})
    for u in users:
        tweets = sample_tweet(df, u, n=20)
        tweets = pd.DataFrame(tweets).assign(user_id=u)
        result = pd.concat([result, tweets])
    return result

### In context learning

In [4]:

openai.api_key = INSERT_API_KEY

def chat_with_model(prompt):
    completion = openai.ChatCompletion.create(
        model='gpt-3.5-turbo-1106',
        messages=[
            {"role": "system", "content": "Your task is to discuss political election questions and provide persuasive answers with supporting reasons. Please engage in a thoughtful and informed discussion, presenting well-reasoned arguments to convince others of your perspective on political issues. Your responses should be clear, concise, and persuasive, addressing the concerns and questions raised about political elections. Consider supporting your arguments with relevant examples, statistics, or expert opinions to strengthen your persuasive approach.  Please note that your responses should be flexible enough to allow for various relevant and creative answers. You should focus on providing well-supported and convincing arguments to persuade others about the importance and significance of political elections in different contexts and scenarios."},
            {"role": "user", "content": prompt}
        ],
        temperature = 0.1
    )
    return completion.choices[0].message["content"]

def prompting(tweets, topics):
    tweets_text = "\n".join([f"Your tweet: ### {i} ###" for i in tweets])

    # Questions list
    questions = ["""Question 1: 
Do you support Donald Trump or Joe Biden in the upcoming 2020 Presidential Election?
Your answer should be the last name of a candidate without any other words.
"""
                 ]
    
    # Other questions
    for i in range(len(topics)):
        q = f"""Question {i+2}: 
Please share your level of agreement with {topics[i]}, with a score from 1 to 5, where 1 indicating strongly disagree and 5 indicating strongly agree.
The score should be an integer without any other words in the same line.
In a new line, explain the reasons behind your level of agreement.
"""
        questions.append(q)
    questions = "\n\n".join(questions)
    
    # Prompt
    prompt = f'''Here are your tweets. You should try to find patterns in them and be ready to answer other questions with same attitude.
{tweets_text}

Please deliminate your answers to each question with a new line.
Please don't include extra deliminaters, question numbers, etc.

Your questions are:
{questions}
'''
    return prompt

def generate_results(df, topics):
    results = {}
    for u in df['user_id'].unique():
        tweets = df[df['user_id']==u]['cleaned_tweets']
        prompt = prompting(tweets, topics)
        result = chat_with_model(prompt)
        results[u]=result
    return results

def parse_results(df, results, num_topics):
    # df.groupby('user_id').agg({'cleaned_tweets':prompting}).reset_index().iloc[0]['cleaned_tweets']
    
    # Initializing returning dataframe
    cols = ['user_id', 'candidate', 'hashtag']
    for i in range(num_topics):
        cols.append(f'score_topic_{i}')
    df_result = pd.DataFrame({col:[] for col in cols})

    # Parse responses for each user
    for u, r in results.items():
        ans_lst = r.split('\n')
        while(ans_lst.count('')): 
            ans_lst.remove('') 

        # unify the format of candidate name
        candidate = ans_lst[0]
        if candidate.lower() in ['donald', 'donald trump', 'trump']:
            candidate = 'trump'
        elif candidate.lower() in ['joe', 'joe biden', 'biden']:
            candidate = 'biden'
        else:
            candidate = 'N/A'

        # parse scores
        scores = re.findall(r'\n([0-9])\n', r)
        ans_dict = {'user_id': u, 'candidate': candidate, 
                    'hashtag':df[df['user_id']==u]['hashtag'].iloc[0]}
        if len(scores) > 10: scores = scores[-10:]
        for i in range(len(scores)):
            try:
                ans_dict[cols[i+3]] = scores[i]
            except: 
                break
        df_result = df_result.append(ans_dict, ignore_index=True)

    return df_result

### Data and initializing

In [5]:
biden = pd.read_csv('data/cleaned_tweets_biden.csv', lineterminator='\n').drop(columns=['Unnamed: 0'])
trump = pd.read_csv('data/cleaned_tweets_trump.csv', lineterminator='\n').drop(columns=['Unnamed: 0'])

llm = OpenAI(temperature=0,
             openai_api_key=INSERT_API_KEY
             )

Sample 10 users with 20 tweets each (10 * 20 tweets for Biden and Trump respectively)

In [6]:
biden_users = sample_user(biden, 10)
trump_users = sample_user(trump, 10)
biden_tweets = sample_tweet_group(biden, biden_users).assign(hashtag='biden')
trump_tweets = sample_tweet_group(trump, trump_users).assign(hashtag='trump')
df = pd.concat([biden_tweets, trump_tweets])
df

Unnamed: 0,user_id,cleaned_tweets,hashtag
208075,1.864362e+09,TrumpNowMoreThanEver2020 JoeBidenToCreepyToBe...,biden
198817,1.864362e+09,chrisacorbett cenkuygur JoeBiden TrumpNowMo...,biden
82795,1.864362e+09,Trump StopChildTrafficking JoeBidenToCreepyT...,biden
171618,1.864362e+09,Trump StopChildTrafficking JoeBidenToCreepyT...,biden
207750,1.864362e+09,"Trump, on 'Fox &amp; Friends,' claims 'solid c...",biden
...,...,...,...
66628,8.186762e+17,"THIS 👉 GOP. HispanicCaucus, TruthMatters ...",trump
1689,8.186762e+17,JoeBiden Joe2020 BidenForPresident Vote For ...,trump
66198,8.186762e+17,JoeBiden Joe2020 BidenForPresident Rise Up |...,trump
105773,8.186762e+17,JoeBiden Joe2020 JoeBidenForPresident Calls ...,trump


### Prompting

In [7]:
user = df['user_id'].unique()[0]
tweets = df[df['user_id']==user]['cleaned_tweets']
tweets.head()

208075    TrumpNowMoreThanEver2020  JoeBidenToCreepyToBe...
198817    chrisacorbett  cenkuygur  JoeBiden  TrumpNowMo...
82795     Trump  StopChildTrafficking  JoeBidenToCreepyT...
171618    Trump  StopChildTrafficking  JoeBidenToCreepyT...
207750    Trump, on 'Fox &amp; Friends,' claims 'solid c...
Name: cleaned_tweets, dtype: object

In [8]:
topics = ['abortion being legalized', 'death penalty in the US', 'allowing women to serve in military combat roles']
prompt = prompting(tweets, topics)
print(prompt)

Here are your tweets. You should try to find patterns in them and be ready to answer other questions with same attitude.
Your tweet: ### TrumpNowMoreThanEver2020  JoeBidenToCreepyToBePresident  TrumpNow  Trump2020Landslide  realDonaldTrump  POTUS  Trump  VoteRedToSaveAmerica ###
Your tweet: ### chrisacorbett  cenkuygur  JoeBiden  TrumpNowMoreThanEver2020  JoeBidenToCreepyToBePresident  TrumpNow  Trump2020Landslide  realDonaldTrump  POTUS  Trump ###
Your tweet: ### Trump  StopChildTrafficking  JoeBidenToCreepyToBePresident  realDonaldTrump  Trump2020  TrumpNowMoreThanEver2020  POTUS ###
Your tweet: ### Trump  StopChildTrafficking  JoeBidenToCreepyToBePresident  realDonaldTrump  POTUS  Trump  Trump2020  Trump2020Landslide
Urgent: Website lists Trump supporters and their home addresses just days before the election ###
Your tweet: ### Trump, on 'Fox &amp; Friends,' claims 'solid chance of winning,' won't 'play games' in declaring victory    FoxNews ###
Your tweet: ### Baaahahaha  Trump  S

### In Context Learning

System Role Prompt:

Your task is to discuss political election questions and provide persuasive answers with supporting reasons. Please engage in a thoughtful and informed discussion, presenting well-reasoned arguments to convince others of your perspective on political issues. Your responses should be clear, concise, and persuasive, addressing the concerns and questions raised about political elections. Consider supporting your arguments with relevant examples, statistics, or expert opinions to strengthen your persuasive approach.  Please note that your responses should be flexible enough to allow for various relevant and creative answers. You should focus on providing well-supported and convincing arguments to persuade others about the importance and significance of political elections in different contexts and scenarios.

##### Single response

In [9]:
result = chat_with_model(prompt)
print(result)

Trump

1

I believe in the sanctity of life and the protection of the unborn. Abortion is a complex and sensitive issue, and I strongly believe in the protection of the unborn, as every life is valuable and deserving of protection.

5

I support the death penalty as a form of justice for the most heinous crimes. It serves as a deterrent and provides closure to the families of the victims. However, it should be applied judiciously and with stringent safeguards to prevent wrongful convictions.

4

I strongly support allowing women to serve in military combat roles. Women have demonstrated their capability and dedication in various military roles, and their inclusion in combat roles enhances the effectiveness and diversity of our armed forces.


##### All responses from each user

In [10]:
results = generate_results(df, topics)
results

{1864361654.0: "Trump\n\n1\n\nI believe in the sanctity of life and the protection of the unborn. Legalizing abortion goes against my moral and ethical beliefs. I support policies that protect the rights of the unborn and provide alternatives to abortion, such as adoption and support for pregnant women in need.\n\n5\n\nI support the death penalty as a form of justice for heinous crimes. It serves as a deterrent and provides closure to the victims' families. However, I believe it should be applied judiciously and with strong safeguards to prevent wrongful convictions.\n\n5\n\nI strongly support allowing women to serve in military combat roles. Women have demonstrated their capabilities and dedication in the military, and they should have the opportunity to contribute in all capacities. Gender should not limit anyone's ability to serve their country.",
 1.1324659830530784e+18: "Biden\n5\nI strongly agree with the legalization of abortion. Women should have the right to make decisions abo

In [11]:
parse_results(df, results, len(topics))

  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict,

Unnamed: 0,user_id,candidate,hashtag,score_topic_0,score_topic_1,score_topic_2
0,1864362000.0,trump,biden,1,5,5
1,1.132466e+18,biden,biden,5,1,5
2,2739945000.0,biden,biden,4,2,5
3,593769500.0,biden,biden,5,1,5
4,1.174653e+18,biden,biden,3,4,5
5,2388516000.0,trump,biden,2,5,5
6,1.006104e+18,biden,biden,3,2,5
7,16241880.0,biden,biden,3,4,5
8,1138512000.0,biden,biden,3,4,5
9,61189900.0,biden,biden,4,3,5


### Running all topics & users

##### Question topics

In [12]:
topics = ["improving LGBTQIA+ rights",
    "increasing gun control",
    "abortion being legalized",
    "the universities practicing affirmative action",
    "climate change in the US is cause for concern",
    "United States needs more paths for citizenship for undocumented immigrants",
    "death penalty",
    "allowing women to serve in military combat roles",
    "legalization of marijuana",
    "reducing interest rates for student loans"]


Sample 100 users with 20 tweets each (100 * 20 tweets for Biden and Trump respectively)

In [13]:
biden_users = sample_user(biden, 50)
trump_users = sample_user(trump, 50)
biden_tweets = sample_tweet_group(biden, biden_users).assign(hashtag='biden')
trump_tweets = sample_tweet_group(trump, trump_users).assign(hashtag='trump')
df = pd.concat([biden_tweets, trump_tweets])
df

Unnamed: 0,user_id,cleaned_tweets,hashtag
208075,1.864362e+09,TrumpNowMoreThanEver2020 JoeBidenToCreepyToBe...,biden
198817,1.864362e+09,chrisacorbett cenkuygur JoeBiden TrumpNowMo...,biden
82795,1.864362e+09,Trump StopChildTrafficking JoeBidenToCreepyT...,biden
171618,1.864362e+09,Trump StopChildTrafficking JoeBidenToCreepyT...,biden
207750,1.864362e+09,"Trump, on 'Fox &amp; Friends,' claims 'solid c...",biden
...,...,...,...
152073,1.100927e+09,JoeBiden: We're beating DonaldTrump by over 4...,trump
106427,1.100927e+09,U.S. Democratic nominee JoeBiden visits his c...,trump
54075,1.100927e+09,Democratic U.S. presidential nominee JoeBiden...,trump
167858,1.100927e+09,Ancestral village of U.S. VP-elect KamalaHarr...,trump


In [14]:
results = generate_results(df, topics)

In [15]:
results

{1864361654.0: "Trump\n\n1\n\nI believe in traditional family values and the sanctity of marriage between a man and a woman. LGBTQIA+ rights should be respected, but not at the expense of traditional values and religious freedom.\n\n3\n\nI support responsible gun ownership and believe in the Second Amendment. While there should be measures to prevent gun violence, excessive gun control infringes on the rights of law-abiding citizens.\n\n2\n\nI believe in the sanctity of life and that abortion should only be considered in cases of rape, incest, or when the mother's life is in danger.\n\n1\n\nI believe in equal opportunities for all individuals, regardless of race or ethnicity. Affirmative action can lead to reverse discrimination and should not be the basis for university admissions.\n\n3\n\nI acknowledge that climate change is a concern, but I believe that the US should prioritize economic growth and job creation while implementing responsible environmental policies.\n\n2\n\nWhile I sy

In [16]:
df_results = parse_results(df, results, len(topics))

  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict, ignore_index=True)
  df_result = df_result.append(ans_dict,

In [17]:
df_results

Unnamed: 0,user_id,candidate,hashtag,score_topic_0,score_topic_1,score_topic_2,score_topic_3,score_topic_4,score_topic_5,score_topic_6,score_topic_7,score_topic_8,score_topic_9
0,1.864362e+09,trump,biden,1,3,2,1,3,2,5,5,1,4
1,1.132466e+18,biden,biden,5,4,5,5,5,4,3,5,5,5
2,2.739945e+09,biden,biden,5,4,5,4,5,4,3,5,5,4
3,5.937695e+08,biden,biden,5,5,5,5,5,5,1,5,5,5
4,1.174653e+18,biden,biden,5,5,5,5,5,5,1,5,5,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...
94,1.260084e+08,biden,trump,3,4,5,4,5,4,2,5,5,5
95,7.967792e+17,biden,trump,5,5,5,5,5,5,1,5,5,5
96,7.449002e+07,biden,trump,5,5,5,5,5,5,3,5,5,5
97,2.038072e+07,trump,trump,1,5,1,1,3,2,5,4,1,3


Note some N/A appears:

`1.1682808343082844e+18: "I'm sorry, I cannot fulfill this request.",`

In [18]:
df_results[df_results.isna().any(axis=1)]

Unnamed: 0,user_id,candidate,hashtag,score_topic_0,score_topic_1,score_topic_2,score_topic_3,score_topic_4,score_topic_5,score_topic_6,score_topic_7,score_topic_8,score_topic_9
20,1.26132e+18,,biden,,,,,,,,,,
35,1.168281e+18,,biden,,,,,,,,,,
46,1.315531e+18,trump,biden,1.0,,,,,,,,,


In [19]:
df_results.to_csv('data/in_context_responses.csv')