In [2]:
import pandas as pd
from google.cloud import bigquery

def load_data_to_bq(
        data: pd.DataFrame,
        gcp_project:str,
        bq_dataset:str,
        table: str,
        truncate: bool
    ) -> None:

    full_table_name = f"{gcp_project}.{bq_dataset}.{table}"
    client = bigquery.Client()

    # Define write mode and schema
    write_mode = "WRITE_TRUNCATE" if truncate else "WRITE_APPEND"
    job_config = bigquery.LoadJobConfig(write_disposition=write_mode)

    print(f"\n{'Write' if truncate else 'Append'} {full_table_name} ({data.shape[0]} rows)")

    # Load data
    job = client.load_table_from_dataframe(data, full_table_name, job_config=job_config)
    result = job.result()  # wait for the job to complete

    print(f"✅ Data saved to bigquery, with shape {data.shape}")


In [3]:
local_file = pd.read_csv('/home/ricorenzo/code/renzorico/raw_data_project/data_st.csv')
# local_file = local_file.drop(['Unnamed: 0.1', 'Unnamed: 0'], axis=1)

In [4]:
local_file

Unnamed: 0,iso,year,speeches,country,cleaned_speeches,preprocessed_speech,topic_num,topic,countries_recoded,top_5_words,bert_topic,bert_prob,ber_topic_words,decade,year_range,continent
0,BEL,1950,Never before have men throughout the world mor...,Belgium,never before have men throughout the world mor...,never man throughout world fervently unanimous...,3,war,"['South Korea', 'New York']",['four year' 'south korea' 'month ago' 'upon w...,-1,0.00000,"['united', 'nations', 'united nations', 'inter...",1950s,1946-1970,Europe
1,BEL,1950,"We have used these great, just and noble words...",Belgium,we have used these great just and noble words ...,use great noble word much become wear eye lose...,3,war,[],['charter united' 'charter united nations' 'he...,5,1.00000,"['world', 'peace', 'war', 'we', 'man', 'people...",1950s,1946-1970,Europe
2,BEL,1950,"These are not, of course, the only lessons of ...",Belgium,these are not of course the only lessons of th...,course lesson korean affair would well weigh s...,3,war,['Korea'],['government consider' 'zone peace' 'united na...,8,0.59265,"['korea', 'korean', 'republic korea', 'peninsu...",1950s,1946-1970,Europe
3,BEL,1950,"From the first day, the general debate in the ...",Belgium,from the first day the general debate in the a...,first day general debate assembly justify high...,14,multilateralism,"['Netherlands', 'Chile', 'United States', 'USS...",['soviet union' 'general debate' 'delegation w...,-1,0.00000,"['united', 'nations', 'united nations', 'inter...",1950s,1946-1970,Europe
4,BEL,1950,I had listened attentively to Mr. Vyshinsky’s ...,Belgium,i had listened attentively to mr vyshinsky’s s...,listen attentively mr vyshinsky ’s speech th m...,14,multilateralism,['the Soviet Union'],['cooperation among nation' 'problem united' '...,1,1.00000,"['nuclear', 'disarmament', 'weapon', 'nuclear ...",1950s,1946-1970,Europe
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117491,HUN,2014,"Two years ago here in New York, Hurricane Sand...",Hungary,two years ago here in new york hurricane sandy...,two year ago new york hurricane sandy cause da...,1,climate,"['New York', 'New York City', 'New York', 'New...",['despite fact' 'among other' 'ensure success'...,-1,0.00000,"['united', 'nations', 'united nations', 'inter...",2010s,1990-today,Europe
117492,HUN,2014,Why can we not listen to the wisdom and advice...,Hungary,why can we not listen to the wisdom and advice...,listen wisdom advice scientist accept scientif...,1,climate,[],['government people republic' 'government repu...,-1,0.00000,"['united', 'nations', 'united nations', 'inter...",2010s,1990-today,Europe
117493,HUN,2014,"I note, among others, the high incidence of de...",Hungary,i note among others the high incidence of derm...,note among other high incidence dermatological...,1,climate,"['Montreal', 'United States', 'Paris', 'Montre...",['next year' 'become increasingly' 'last week'...,-1,0.00000,"['united', 'nations', 'united nations', 'inter...",2010s,1990-today,Europe
117494,HUN,2014,"Only a few days ago, we read that the carbon d...",Hungary,only a few days ago we read that the carbon di...,day ago read carbon dioxide concentration neve...,1,climate,['New York City'],['gross domestic product' 'hundred million' 't...,6,1.00000,"['climate', 'climate change', 'change', 'islan...",2010s,1990-today,Europe


In [5]:
local_file.dropna(subset=['country', 'speeches'], axis=0, inplace=True)

In [6]:
local_file['year'] = local_file['year'].astype(int)


In [7]:
local_file.head()

Unnamed: 0,iso,year,speeches,country,cleaned_speeches,preprocessed_speech,topic_num,topic,countries_recoded,top_5_words,bert_topic,bert_prob,ber_topic_words,decade,year_range,continent
0,BEL,1950,Never before have men throughout the world mor...,Belgium,never before have men throughout the world mor...,never man throughout world fervently unanimous...,3,war,"['South Korea', 'New York']",['four year' 'south korea' 'month ago' 'upon w...,-1,0.0,"['united', 'nations', 'united nations', 'inter...",1950s,1946-1970,Europe
1,BEL,1950,"We have used these great, just and noble words...",Belgium,we have used these great just and noble words ...,use great noble word much become wear eye lose...,3,war,[],['charter united' 'charter united nations' 'he...,5,1.0,"['world', 'peace', 'war', 'we', 'man', 'people...",1950s,1946-1970,Europe
2,BEL,1950,"These are not, of course, the only lessons of ...",Belgium,these are not of course the only lessons of th...,course lesson korean affair would well weigh s...,3,war,['Korea'],['government consider' 'zone peace' 'united na...,8,0.59265,"['korea', 'korean', 'republic korea', 'peninsu...",1950s,1946-1970,Europe
3,BEL,1950,"From the first day, the general debate in the ...",Belgium,from the first day the general debate in the a...,first day general debate assembly justify high...,14,multilateralism,"['Netherlands', 'Chile', 'United States', 'USS...",['soviet union' 'general debate' 'delegation w...,-1,0.0,"['united', 'nations', 'united nations', 'inter...",1950s,1946-1970,Europe
4,BEL,1950,I had listened attentively to Mr. Vyshinsky’s ...,Belgium,i had listened attentively to mr vyshinsky’s s...,listen attentively mr vyshinsky ’s speech th m...,14,multilateralism,['the Soviet Union'],['cooperation among nation' 'problem united' '...,1,1.0,"['nuclear', 'disarmament', 'weapon', 'nuclear ...",1950s,1946-1970,Europe


In [11]:
eval('2 + 2')

4

In [8]:
import ast
local_file['countries_recoded'] = local_file['countries_recoded'].apply(ast.literal_eval)

In [9]:

load_data_to_bq(local_file, 'lewagon-bootcamp-384011' ,'production_dataset','speeches', True)



Write lewagon-bootcamp-384011.production_dataset.speeches (117496 rows)
✅ Data saved to bigquery, with shape (117496, 16)


In [10]:
local_file.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 117496 entries, 0 to 117495
Data columns (total 16 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   iso                  117496 non-null  object 
 1   year                 117496 non-null  int64  
 2   speeches             117496 non-null  object 
 3   country              117496 non-null  object 
 4   cleaned_speeches     117496 non-null  object 
 5   preprocessed_speech  117496 non-null  object 
 6   topic_num            117496 non-null  int64  
 7   topic                117496 non-null  object 
 8   countries_recoded    117496 non-null  object 
 9   top_5_words          117496 non-null  object 
 10  bert_topic           117496 non-null  int64  
 11  bert_prob            117496 non-null  float64
 12  ber_topic_words      117496 non-null  object 
 13  decade               117496 non-null  object 
 14  year_range           117496 non-null  object 
 15  continent        