In [1]:
## libraries
import requests
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import json
import os
import nltk
import re
import dotenv
import openai
import spacy
import networkx as nx
import gensim
import itertools
import matplotlib.pyplot as plt
import community as community_louvain
import random
import time
from tqdm import tqdm

from sklearn.metrics.pairwise import cosine_similarity
from spacy.matcher import Matcher
from openai import OpenAI as openAI
from nltk.tokenize import wordpunct_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from gensim.models.ldamulticore import LdaMulticore
from gensim import corpora, models
from gensim.models import Doc2Vec
from gensim.models.doc2vec import TaggedDocument
from itertools import chain
from langchain.agents.agent_types import AgentType
from langchain_community.llms import OpenAI as OpenAI_langchain
from langchain_experimental.agents.agent_toolkits import create_csv_agent
from os import makedirs
from os.path import join, exists
from datetime import date, timedelta
from collections import Counter
from langchain_openai import ChatOpenAI
from concurrent.futures import ThreadPoolExecutor

In [3]:
## setting up OpenAI API
API_KEY_HEADER = f"Authorization: Bearer {os.getenv('OPENAI_API_KEY')}"
openai.api_key = os.getenv('OPENAI_API_KEY')

In [3]:
## narratives
positive_narratives = [
    'The economy/specific sectors need(s) migrants',
    'Specific sectors/roles are dominated by migrants',
    'Migrants have a positive impact on public finances',
    'Migrants (or a specific migrant) are strong/a powerful force',
    'Migrants are well integrated',
    'The public is less opposed to/not opposed to/supportive of migration',
    'Migrants (or a specific migrant) are mistreated and that needs to stop',
    'Migrants (or a specific migrant) are vulnerable and need to be protected',
    'The UK compares well to another country on migration issues',
    'Systems fail migrants and need to be improved',
]

negative_narratives = [
    'Migrants cheat systems',
    'Migrants are linked to crime/antisocial/problematic behaviour',
    'The UK needs to deter migrants',
    'Levels of immigration/net migration to the UK are too high',
    'Migrants are a drain on public finances',
    'Migrants negatively affect local labour and drive local unemployment',
    'Migrants receive priority treatment over British citizens, which is bad',
    'The public is opposed to migration',
    'Migrants threaten national security',
    'Migrants negatively impact social cohesion by altering culture'
]

## Publications in the UK 
publications = [
    'The Guardian',
    'The Independent',
    'The Times',
    'The Telegraph',
    'The Sun',
    'The Daily Mail',
    'The Mirror',
    'The Express',
    'The Star',
    'The Metro',
    'The Evening Standard',
    'The Financial Times',
    'The Spectator',
    'The New Statesman',
    'The Economist',
    'The Conversation',
    'The BBC',
    'Sky News',
    'ITV News',
    'Channel 4 News',
    'The Huffington Post',
    'The Canary',
    'The Morning Star',
    'The Daily Express'
]

## Finalising generative architecture

In [7]:
## function to generate single article
def generate_article (i, narratives, publications, sentiment):

    client = openAI(
        api_key = os.environ.get("OPENAI_API_KEY4"),
    )

    publication = random.choice (publications)
    narrative = random.choice (narratives)
    passion_level = random.randint (5, 9)
    language_level = random.randint (5, 9)

    prompt = f"""You are a journalist for the UK news publication {publication}. When writing in a particular topic, 
        your job is to accurately and responsibly reflect the narratives present in the public consciousness at the time of writing.
        
        UK political discourse is currently focused on Brexit, which has made the topic of migration hotly debated and highly relevant. 
        
        In this political climate, you are tasked with writing an article on migration and Brexit, specifically alluding to the large migration flows 
        facilitated by EU schemes, how they impact the UK, and what would happen if migration was to decrease as a result of Brexit.

        Here are some instructions to guide your writing:
         
        - Your article should portray increased migration as a {sentiment} thing, specifically emulating the narrative that {narrative}. 
        - On a scale of 1-10, where 1 is neutral-sounding and 10 is very passionate, your tone should have a passion level of {passion_level}.
        - On a scale of 1-10, where 1 is simple language and 10 is verbose and complex, your language should be at a level of {language_level}.
        - Please give me the body text of the article only, no title needed.
        - Your article should provide a comprehensive analysis of both immediate and long-term impacts of migration on the UK, exploring economic, cultural, and societal dimensions.
        - Please include relevant data, statistics, and quotes from credible experts or studies to support your arguments.
        - Ensure that your article reflects multiple viewpoints on migration, discussing both the positive contributions and the challenges associated with it.
        - Incorporate personal stories or cultural aspects to highlight the human element of migration.
        - Your tone should be thoughtful and engaging.
        - Focus your analysis on how migration affects specific sectors such as healthcare, labor markets, or the cultural sector, providing detailed examples and case studies.

        Language Complexity Examples:

        0 (Super Simple):

        "Brexit led to changes in how people move between the UK and Europe."
        "Migration rules are different after Brexit."
        10 (Verbose and Complex):

        "The intricate labyrinth of legislative alterations catalyzed by Brexit profoundly reshaped the paradigms governing transnational mobility between the United Kingdom and the European Union."
        "The kaleidoscopic tapestry of policy shifts post-Brexit has engendered a nuanced landscape wherein the contours of migratory flux between the UK and Europe are painstakingly redefined."

        Passion Level Examples:

        0 (Very Neutral):

        "Brexit has influenced migration patterns."
        "People's views on Brexit vary."
        10 (Very Passionate and Fiery):

        "Brexit's impact on migration stirs profound emotions, evoking impassioned debates and fervent protests across Europe."
        "The divisive repercussions of Brexit on migration ignite fiery rhetoric, fueling passionate rallies and heartfelt pleas for unity and understanding."
        """

    response = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": prompt,
            }
        ],
        model = 'gpt-3.5-turbo',
        temperature = 0.5,
        max_tokens = 1000
    )

    article = response.choices[0].message.content
    #print(f"Article {i+1} generated successfully.")
    return article, publication, narrative, passion_level, language_level

## function to generate multiple articles with multithreading
def writeArticleGPT (narratives, publications, sentiment):

    df = pd.DataFrame (columns = ['body'])

    with ThreadPoolExecutor() as executor:
        results = list (
            tqdm (executor.map (
                generate_article, range(50), [narratives]*50, [publications]*50, [sentiment]*50
                ), 
                total = 50)
            )

    for article, publication, narrative, passion_level, language_level in results:
        
        df_temp = pd.DataFrame (
            {'body': [article], 
             'publication': [publication], 
             'narrative': [narrative], 
             'passion_level': [passion_level],
             'language_level': [language_level]}
            )
        df = pd.concat ([df, df_temp], ignore_index = True)

    return df

In [8]:
## create an empty DataFrame
all_synth_articles_positive = pd.DataFrame()

## running the function 6 times with a 1-minute pause between each run
for i in range(6):

    synth_articles_positive = writeArticleGPT (positive_narratives, 
                                               publications, 
                                               'positive')
    all_synth_articles_positive = pd.concat(
        [all_synth_articles_positive, synth_articles_positive], 
        ignore_index = True)
    
    # pausing for 1 minute (60 seconds) if it's not the last iteration
    if i != 5:
        time.sleep(60)

  0%|          | 0/50 [00:00<?, ?it/s]

100%|██████████| 50/50 [00:40<00:00,  1.24it/s]
100%|██████████| 50/50 [00:37<00:00,  1.33it/s]
100%|██████████| 50/50 [00:38<00:00,  1.29it/s]
100%|██████████| 50/50 [00:38<00:00,  1.31it/s]
100%|██████████| 50/50 [00:36<00:00,  1.38it/s]
100%|██████████| 50/50 [00:36<00:00,  1.35it/s]


In [9]:
## create an empty DataFrame
all_synth_articles_negative = pd.DataFrame()

## running the function 6 times with a 1-minute pause between each run
for i in range(6):

    synth_articles_negative = writeArticleGPT (negative_narratives,
                                               publications,
                                               'negative')
    all_synth_articles_negative = pd.concat(
        [all_synth_articles_negative, synth_articles_negative], 
        ignore_index = True)
    
    # pausing for 1 minute (60 seconds) if it's not the last iteration
    if i != 5:
        time.sleep(60)

100%|██████████| 50/50 [00:30<00:00,  1.62it/s]
100%|██████████| 50/50 [00:32<00:00,  1.52it/s]
100%|██████████| 50/50 [00:29<00:00,  1.68it/s]
100%|██████████| 50/50 [00:28<00:00,  1.73it/s]
100%|██████████| 50/50 [00:28<00:00,  1.72it/s]
100%|██████████| 50/50 [00:30<00:00,  1.65it/s]


In [11]:
## joining into one corpus
MigNar = pd.concat([all_synth_articles_negative, all_synth_articles_positive], ignore_index=True)

In [13]:
display (MigNar)

Unnamed: 0,body,publication,narrative,passion_level,language_level
0,Migration has been a contentious issue in the ...,The Times,Migrants are a drain on public finances,5.0,5.0
1,In the tumultuous landscape of post-Brexit Bri...,The Mirror,Migrants receive priority treatment over Briti...,9.0,7.0
2,"The influx of migrants into the UK, facilitate...",The Sun,Migrants negatively affect local labour and dr...,8.0,7.0
3,Migration has long been a contentious issue in...,ITV News,Migrants are linked to crime/antisocial/proble...,6.0,5.0
4,The intersection of migration and Brexit has i...,The Independent,Migrants cheat systems,9.0,7.0
...,...,...,...,...,...
595,Amidst the tumultuous landscape of Brexit nego...,The Daily Express,Migrants have a positive impact on public fina...,5.0,9.0
596,Amidst the tumultuous sea of Brexit negotiatio...,The Spectator,Migrants (or a specific migrant) are strong/a ...,9.0,9.0
597,The intricate interplay between migration and ...,Channel 4 News,The public is less opposed to/not opposed to/s...,5.0,9.0
598,Amidst the tumultuous landscape of Brexit nego...,The Independent,The public is less opposed to/not opposed to/s...,9.0,9.0


In [14]:
# Assuming MigNar is a pandas DataFrame
MigNar.to_csv('MigNar.csv', index = False)