# Export BBC and BBCSports from Postgre DB to CSV

In [1]:
import os
import pandas as pd

# conda install -c conda-forge python-dotenv
from dotenv import load_dotenv

# conda install -c anaconda sqlalchemy
from sqlalchemy import create_engine

In [2]:
pd.options.display.max_rows = 1000

In [3]:
load_dotenv() # => True if no error

True

In [4]:
# Load secrets from the .env file
db_name = os.getenv("db_name")
db_username = os.getenv("db_username")
db_password = os.getenv("db_password")
db_table_schema = os.getenv("db_table_schema")
connection_string = f"postgres://{db_username}:{db_password}@localhost:5432/{db_name}"
engine = create_engine(connection_string)

In [5]:
# List of available tables in the DB
q = """
SELECT * 
FROM information_schema.tables
WHERE table_catalog = '{db_name}'
AND table_schema = '{db_table_schema}';
""".format(
    db_name = db_name,
    db_table_schema = db_table_schema
)

pd.read_sql(q, con=engine)[["table_name"]]

Unnamed: 0,table_name
0,AllTheNews21
1,BBCArticles
2,BBCSportsArticles


In [6]:
# BBCSports Dataset
q = """
SELECT *
FROM public."BBCSportsArticles";
"""

bbc_sports = pd.read_sql(q, con=engine)
display(bbc_sports.shape)
display(bbc_sports.head())

(737, 3)

Unnamed: 0,category,titles,contents
0,athletics,Claxton hunting first major medal,British hurdler Sarah Claxton is confident she...
1,athletics,O'Sullivan could run in Worlds,Sonia O'Sullivan has indicated that she would ...
2,athletics,Greene sets sights on world title,Maurice Greene aims to wipe out the pain of lo...
3,athletics,IAAF launches fight against drugs,The IAAF - athletics' world governing body - h...
4,athletics,"Dibaba breaks 5,000m world record",Ethiopia's Tirunesh Dibaba set a new world rec...


In [7]:
# BBC Dataset
q = """
SELECT *
FROM public."BBCArticles";
"""

bbc = pd.read_sql(q, con=engine)
display(bbc.shape)
display(bbc.head())

(2225, 3)

Unnamed: 0,category,titles,contents
0,business,Ad sales boost Time Warner profit,Quarterly profits at US media giant TimeWarner...
1,business,Dollar gains on Greenspan speech,The dollar has hit its highest level against t...
2,business,Yukos unit buyer faces loan claim,The owners of embattled Russian oil giant Yuko...
3,business,High fuel prices hit BA's profits,British Airways has blamed high fuel prices fo...
4,business,Share boost for feud-hit Reliance,The board of Indian conglomerate Reliance has ...


In [8]:
# Export BBCSports to CSV
bbc_sports.to_csv("../clean-datasets/exported-from-db/bbc_sports.csv")

In [9]:
# Export BBC to CSV
bbc.to_csv("../clean-datasets/exported-from-db/bbc.csv")