In [44]:
import sys
import os
import sqlite3
import textwrap
import pandas as pd

In [45]:
from src.config import DB_PATH
sys.path.append(os.path.abspath('..'))
print(f"Database found at: {DB_PATH}")

Database found at: /home/hans/projects/journalist_dashboard/data/yle_data.db


In [46]:
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()

In [47]:
cursor.execute("SELECT COUNT(*) FROM articles")
total_articles = cursor.fetchone()[0]

cursor.execute("SELECT COUNT(*) FROM articles WHERE content IS NOT NULL")
articles_with_content = cursor.fetchone()[0]

print(f"Total Articles: {total_articles}")
print(f"Articles with Content: {articles_with_content}")

Total Articles: 123
Articles with Content: 123


In [48]:
query = "SELECT name FROM sqlite_master WHERE type='table';"
tables = pd.read_sql(query, conn)

print(tables)

          name
0  journalists
1     articles


Let's check the structure of the 'articles' table

In [49]:
df_structure = pd.read_sql("PRAGMA table_info(articles);", conn)
print(df_structure[['name', 'type', 'pk']])

             name  type  pk
0              id  TEXT   1
1           title  TEXT   0
2             url  TEXT   0
3  published_date  TEXT   0
4         content  TEXT   0
5   journalist_id  TEXT   0
6     description  TEXT   0
7        keywords  TEXT   0


Check the structure of 'journalists' table

In [50]:
df_structure = pd.read_sql("PRAGMA table_info(journalists);", conn)
print(df_structure[['name', 'type', 'pk']])

          name  type  pk
0           id  TEXT   1
1         name  TEXT   0
2  profile_url  TEXT   0


Let's check some articles

In [51]:
df_preview = pd.read_sql("SELECT * FROM articles LIMIT 123;", conn)
print(df_preview)

              id                                              title  \
0    74-20178275  Gielddasápmelaš aktivista Andrei Danilov oačču...   
1    74-20161005  Rikostutkinta vuoden 2023 saamelaiskäräjävaale...   
2    74-20161003  Rihkusdutkan jagi 2023 sámediggeválggain loahp...   
3    74-20155722  Guovddášbellodat válgavuoiti sámegielddain – l...   
4    74-20154955  Sohkaršohkka sádde njuolggosáddaga Sámi Grand ...   
..           ...                                                ...   
118  74-20069618  Analyysi: Trump jyräsi Iowassa – mutta onko hi...   
119  74-20065911  Arizona kuivuu ja maanviljely vaikeutuu, mutta...   
120  74-20065901  Yhdysvaltain osavaltio teki päivähoidosta ilma...   
121  74-20065171  USA:n kongressi hyväksyi lain, joka estää pres...   
122  74-20064743  Analyysi: Vääntö Ukrainan-tuesta on käännekoht...   

                              url published_date  \
0    https://yle.fi/a/74-20178275           None   
1    https://yle.fi/a/74-20161005          

In [53]:
df_preview = pd.read_sql("SELECT * FROM journalists LIMIT 123;", conn)
print(df_preview)

           id               name                     profile_url
0  56-74-1051         Iida Tikka  https://yle.fi/p/56-74-1051/fi
1  56-74-1533  Hans Ravna-Pieski  https://yle.fi/p/56-74-1533/fi


In [34]:
cursor.execute("SELECT title, url, content FROM articles ORDER BY RANDOM() LIMIT 3")
rows = cursor.fetchall()

for i, row in enumerate(rows):
    title = row[0]
    url = row[1]
    content = row[2]
    
    print(f"Article #{i+1}")
    print(f"Title:   {title}")
    print(f"URL:     {url}")
    
    if content:
        preview = textwrap.shorten(content, width=500, placeholder="...")
        print(f"Content:\n{content}")
    else:
        print("Content: [EMPTY]")
        
    print("-" * 67)

Article #1
Title:   Sámemusea Siiddas sávvet nammadeami jagi eurohpalaš musean bidjat fártta sámedávviriid máhcaheapmái máilmmi museain
URL:     https://yle.fi/a/74-20087333
Content:
Sámemusea Siida Anáris lea nammaduvvon Jagi eurohpalaš musean 2024. Vuoiti bálkkašuvvui lávvardaga European Museum of the Year Award -konferánssas Portugala Portimãos. Bálkkašumis gilvaledje 50 musea 24 riikkas.

Bálkkašumi leigga Portugalis vuostáiváldimin museahoavda Taina Pieski ja intendeanta Eija Ojanlatva.

– Mii háliidit giitit olles Sámi servodaga dán bálkkašumi ovddas, dajai Pieski bálkkašumi oaččodettiin.

Bálkkašupmái nammaduvvojit ođđa ja ođasmahtton museat. Sámemusea Siiddas rahpe ođđa váldočájáhusa geassit 2022.

Bálkkašupmi mieđihuvvui searvvušlašvuođas ja kulturárbbi máhcaheapmái laktáseaddji barggus. Álbmotmusea máhcahii badjel 2 200 sámedávvira ruovttoluotta Sápmái sámemusea Siidii čakčat 2021.

– Mii válddiimet badjel 300 sápmelačča mielde bargui, mii laktásii min máttarváhnemiid dávviri

Everything as expected!

In [35]:
conn.close()