In [57]:
import sys
import os
import sqlite3
import textwrap
import pandas as pd

In [58]:
from src.config import DB_PATH
sys.path.append(os.path.abspath('..'))
print(f"Database found at: {DB_PATH}")

Database found at: /home/hans/projects/journalist_dashboard/data/yle_data.db


In [59]:
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()

In [60]:
cursor.execute("SELECT COUNT(*) FROM articles")
total_articles = cursor.fetchone()[0]

cursor.execute("SELECT COUNT(*) FROM articles WHERE content IS NOT NULL")
articles_with_content = cursor.fetchone()[0]

print(f"Total Articles: {total_articles}")
print(f"Articles with Content: {articles_with_content}")

Total Articles: 71
Articles with Content: 71


In [61]:
query = "SELECT name FROM sqlite_master WHERE type='table';"
tables = pd.read_sql(query, conn)

print(tables)

          name
0  journalists
1     articles


Let's check the structure of the 'articles' table

In [62]:
df_structure = pd.read_sql("PRAGMA table_info(articles);", conn)
print(df_structure[['name', 'type', 'pk']])

             name  type  pk
0              id  TEXT   1
1           title  TEXT   0
2             url  TEXT   0
3  published_date  TEXT   0
4         content  TEXT   0
5     description  TEXT   0
6        keywords  TEXT   0
7   journalist_id  TEXT   0


Check the structure of 'journalists' table

In [63]:
df_structure = pd.read_sql("PRAGMA table_info(journalists);", conn)
print(df_structure[['name', 'type', 'pk']])

          name  type  pk
0           id  TEXT   1
1         name  TEXT   0
2  profile_url  TEXT   0


Let's check some articles

In [64]:
df_articles_preview = pd.read_sql("SELECT * FROM articles LIMIT 123;", conn)
print(df_articles_preview)

             id                                              title  \
0   74-20203401  Buollašat váikkuhit sakka maid dáluide – ná ba...   
1   74-20105252  Poikkeuksellinen ilmiö: Rotat valtasivat kylän...   
2   74-20105268  Jurssánis juŋkkát ráfehuhttet giliolbmuid Dean...   
3   74-20103823  Ohcejohka lea miestaluvvan šibihiid váilevuođa...   
4   74-20104142  Ohcejotgeainnu bajildanbargu lea farga gárvvis...   
..          ...                                                ...   
66  74-20178463  Tytti Tuppurainen Ylelle: Käsitykseni mukaan P...   
67  74-20177951  Hollolan koulupalo voi jäädä kunnan ja vakuutu...   
68  74-20177502  Elinvoimajohtaja: Lahteen nouseva datakeskusho...   
69  74-20174646  Ulosteen löyhkä piinaa asukkaita Lahden satama...   
70  74-20173534  Laulujoutsen löytyi kaulastaan siltaan sidottu...   

                             url             published_date  \
0   https://yle.fi/a/74-20203401  2026-01-09T17:00:40+02:00   
1   https://yle.fi/a/74-20105252 

In [None]:
df_journalists_preview = pd.read_sql("SELECT * FROM journalists LIMIT 123;", conn)
print(df_journalists_preview)

           id               name                     profile_url
0  56-74-1051         Iida Tikka  https://yle.fi/p/56-74-1051/fi
1  56-74-1533  Hans Ravna-Pieski  https://yle.fi/p/56-74-1533/fi


In [34]:
cursor.execute("SELECT title, url, content FROM articles ORDER BY RANDOM() LIMIT 3")
rows = cursor.fetchall()

for i, row in enumerate(rows):
    title = row[0]
    url = row[1]
    content = row[2]
    
    print(f"Article #{i+1}")
    print(f"Title:   {title}")
    print(f"URL:     {url}")
    
    if content:
        preview = textwrap.shorten(content, width=500, placeholder="...")
        print(f"Content:\n{content}")
    else:
        print("Content: [EMPTY]")
        
    print("-" * 67)

Article #1
Title:   Sámemusea Siiddas sávvet nammadeami jagi eurohpalaš musean bidjat fártta sámedávviriid máhcaheapmái máilmmi museain
URL:     https://yle.fi/a/74-20087333
Content:
Sámemusea Siida Anáris lea nammaduvvon Jagi eurohpalaš musean 2024. Vuoiti bálkkašuvvui lávvardaga European Museum of the Year Award -konferánssas Portugala Portimãos. Bálkkašumis gilvaledje 50 musea 24 riikkas.

Bálkkašumi leigga Portugalis vuostáiváldimin museahoavda Taina Pieski ja intendeanta Eija Ojanlatva.

– Mii háliidit giitit olles Sámi servodaga dán bálkkašumi ovddas, dajai Pieski bálkkašumi oaččodettiin.

Bálkkašupmái nammaduvvojit ođđa ja ođasmahtton museat. Sámemusea Siiddas rahpe ođđa váldočájáhusa geassit 2022.

Bálkkašupmi mieđihuvvui searvvušlašvuođas ja kulturárbbi máhcaheapmái laktáseaddji barggus. Álbmotmusea máhcahii badjel 2 200 sámedávvira ruovttoluotta Sápmái sámemusea Siidii čakčat 2021.

– Mii válddiimet badjel 300 sápmelačča mielde bargui, mii laktásii min máttarváhnemiid dávviri

Everything as expected!

In [35]:
conn.close()