## Importing necessary libraries

In [None]:
import openai
import json
import pandas as pd
import numpy as np
import re

from tqdm.notebook import tqdm_notebook
tqdm_notebook.pandas()

## Uploading scrapped dataframe

In [None]:
fc_df = pd.read_csv('fc_df.csv', encoding='utf-8', index_col = 0, low_memory = False)

## Preprocessing data

Here we extracted date from "desc" variable and converted it to datetime type.

In [None]:
fc_df["date"] = fc_df["desc"].apply(lambda x: re.search(r'\w+\s\d+,\s\d+',x).group(0))
fc_df["date"] = pd.to_datetime(fc_df["date"])

Here we convert each date to number of days from 1-Jan-2000 to current day.

The underlying assumption is that politicians and any other public figures don't care too much about the season or day of the year. There are only some specific dates that are important to them (meaning they are more prone to lie or to be honest, depends on situation), so it makes sense to look at time as number of days from some fererence point.

In [None]:
fc_df["date_num"] = fc_df["date"].apply(lambda x: x - pd.to_datetime("2000-01-01")).dt.days

Next we need to subset all the records collected before 31-Aug-2021 since it's cutoff date for openAI model.


In [None]:
fc_df = fc_df[fc_df.date_num <= 7913] # correesponds to 31-Aug-2021

Sinse OpenAI model API free requests are limited, we can't use the full dataset to create an AI response, so we need to subset 3000 records from it.

In [None]:
fc_df = fc_df.sample(n = 3000)

## Getting API key 

If you want to replicate results, don't forget to register at https://platform.openai.com/ and receive your own API key. Later save it as 'txt' file in the same place where you code is located as 'apikey.txt'.

In [None]:
filename = 'apikey.txt'

def get_file_contents(filename):
    with open(filename, 'r') as f:
        return f.read().strip()
    
openai.api_key = get_file_contents(filename)

## Defining request funciton

In [4]:
def openai_request(string):
    response = openai.Completion.create(
      model="text-davinci-003",
      prompt="Summerize all you know about this:  " + string,
      temperature=0,
      max_tokens=100,
      top_p=1,
      frequency_penalty=0,
      presence_penalty=0
    )

    response_json = json.dumps(response)
    response_dict = json.loads(response_json)
    response_text = response_dict['choices'][0]['text'][2:]
    
    return response_text

## Generating AI response as separate variable

In [None]:
fc_df_with_ai_response = fc_df.copy(deep=True)
fc_df_with_ai_response['post_ai_response'] = fc_df_with_ai_response['post'].progress_apply(openai_request)

## Removing useless columns

In [None]:
fc_df_with_ai_response = fc_df_with_ai_response.drop(['desc', 'date'],axis = 1)

## Exporting the data as intermediate result

In [None]:
fc_df_with_ai_response.to_csv('fc_df2.csv', encoding='utf-8')