In [1]:
import os
import sqlite3
import pandas as pd

In [2]:
QUERY_ALL = """
SELECT 
    c.conversation_id, 
    t.full_text
FROM 
    Conversations c
JOIN 
    Tweets t
ON 
    c.tweet_id = t.tweet_id
JOIN 
    ConversationsCategory cc
ON 
    c.conversation_id = cc.conversation_id
WHERE 
    c.tweet_order = 1
AND 
    cc.category = 'No Category';

"""

DTYPES = {
# "conversation_id": "int32",
"full_text": "object",
# "tweet_id": "object"
}

COMPANY_NAME_TO_ID = {
    "Klm": "56377143",
    "Air France": "106062176",
    "British Airways": "18332190",
    "American Air": "22536055",
    "Lufthansa": "124476322",
    "Air Berlin": "26223583",
    "Air Berlin assist": "2182373406",
    "easyJet": "38676903",
    "Ryanair": "1542862735",
    "Singapore Airlines": "253340062",
    "Qantas": "218730857",
    "Etihad Airways": "45621423",
    "Virgin Atlantic": "20626359",
}

COMPANY_ID_TO_NAME = {v: k for k, v in COMPANY_NAME_TO_ID.items()}

In [3]:
def get_local_data(query: str, path: str) -> pd.DataFrame:
    # Connect to the SQLite database using a context manager
    with sqlite3.connect(path) as connection:
        return pd.read_sql_query(query, connection,
                                 index_col='conversation_id')


In [4]:
path =  os.path.join(
    os.path.dirname(
            os.getcwd()
        ),
    "data_processed", "local_backup.db")
test_data = get_local_data("SELECT * from ConversationsCategory", path)

In [5]:
category_info = pd.read_csv("test_sentiment_1.csv")

In [6]:
test_data

Unnamed: 0_level_0,category,confidence
conversation_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,technical difficulties,0.111812
2,booking problems,0.145168
3,booking problems,0.152093
4,baggage issues,0.184137
5,flight delays and cancellations,0.193860
...,...,...
493692,No Category,0.000000
493693,No Category,0.000000
493694,No Category,0.000000
493695,No Category,0.000000


In [7]:
category_info

Unnamed: 0,conversation_id,category,confidence
0,422486,in-flight experience,0.346231
1,422487,flight information requests,0.176099
2,422488,flight information requests,0.293611
3,422489,flight information requests,0.161080
4,422490,check-in troubles,0.142027
...,...,...,...
70952,493692,lost luggage,0.156407
70953,493693,booking problems,0.150723
70954,493694,check-in troubles,0.148291
70955,493695,check-in troubles,0.122705


In [8]:
def connect_to_local_database(db_path: str):
    try:
        return sqlite3.connect(db_path)
    except sqlite3.Error as e:
        print(f"Error while connecting to SQLite: {e}")
    return None

def update_text_local(batch, db_path: str) -> None:
    connection = connect_to_local_database(db_path)
    cursor = connection.cursor()
    update_query = "UPDATE ConversationsCategory SET category = ?, confidence = ? WHERE conversation_id = ?"
    cursor.executemany(update_query, batch)
    connection.commit()

In [9]:
to_paste = category_info[["category", "confidence", "conversation_id"]].values.tolist()

In [10]:
update_text_local(to_paste, path)