# Fetching data using `SqlFetcher`
Translating using a SQL database. This notebook assumes that the ***Prepare for `SqlFetcher` demo***-step from `PickleFetcher.ipynb` has been completed.

In [1]:
import sys

import rics

# Print relevant versions
print(f"{rics.__version__=}")
print(f"{sys.version=}")
!git log --pretty=oneline --abbrev-commit -1

rics.__version__='0.5.0'
sys.version='3.8.10 (default, Mar 15 2022, 12:22:08) \n[GCC 9.4.0]'
[33m405e481[m[33m ([m[1;36mHEAD -> [m[1;32mmain[m[33m, [m[1;31mprivate/main[m[33m)[m WIP: Toml


In [2]:
from rics.utility.logs import basic_config, logging

basic_config(level=logging.INFO, rics_level=logging.DEBUG)

## Make some data to translate

In [3]:
import pandas as pd
import sqlalchemy

# !sudo apt-get install build-dep python-psycopg2
# !pip install psycopg2-binary

In [4]:
connection_string = "postgresql://postgres:your_password@localhost:5432/imdb"
engine = sqlalchemy.create_engine(connection_string)


def first_title(seed=None, n=1000):
    df = pd.read_sql("SELECT * FROM name_basics;", engine).sample(n, random_state=seed)
    df["firstTitle"] = df.knownForTitles.str.split(",").str[0]
    return df[["nconst", "firstTitle"]]

## Create translator from config

In [5]:
from rics.translation import Translator

translator = Translator.from_config("config.toml")
print(translator)
tmap = translator.store()
print(translator)

2022-06-15T21:04:19.278 [rics.translation.fetching.SqlFetcher:DEBUG] Metadata created in 0.0779041 sec.
2022-06-15T21:04:19.393 [rics.translation.fetching.SqlFetcher:DEBUG] Size of name_basics=163559 resolved in 0.114662 sec.
2022-06-15T21:04:19.401 [rics.translation.fetching.SqlFetcher:DEBUG] Size of title_basics=42074 resolved in 0.00708434 sec.
2022-06-15T21:04:19.403 [rics.translation.fetching.SqlFetcher:INFO] Found 2 tables in 0.203009 sec: ['name_basics', 'title_basics']
2022-06-15T21:04:19.404 [rics.translation.fetching.Fetcher:DEBUG] Overrides for source='name_basics': {'id': 'nconst', 'name': 'primaryName', 'from': 'birthYear', 'to': 'deathYear'}; unique to source.


Translator(online=True: fetcher=SqlFetcher(engine=Engine(postgresql://postgres:***@localhost:5432/imdb), tables=['name_basics', 'title_basics']))


2022-06-15T21:04:19.829 [rics.translation.fetching.Fetcher:DEBUG] Fetched ('nconst', 'primaryName', 'birthYear', 'deathYear') for 163559 IDS from 'name_basics' in 0.423676 sec.
2022-06-15T21:04:19.830 [rics.translation.fetching.Fetcher:DEBUG] Overrides for source='title_basics': {'id': 'tconst', 'name': 'primaryTitle', 'original_name': 'originalTitle', 'from': 'startYear', 'to': 'endYear'}; unique to source.
2022-06-15T21:04:20.002 [rics.translation.fetching.Fetcher:DEBUG] Fetched ('tconst', 'primaryTitle', 'originalTitle', 'startYear', 'endYear') for 42074 IDS from 'title_basics' in 0.167262 sec.
2022-06-15T21:04:20.003 [rics.translation.fetching.SqlFetcher:INFO] Deleting Engine(postgresql://postgres:***@localhost:5432/imdb)
2022-06-15T21:04:20.004 [rics.translation.Translator:INFO] Store TranslationMap('name_basics': 163559 IDs, 'title_basics': 42074 IDs)


Translator(online=False: cache=TranslationMap('name_basics': 163559 IDs, 'title_basics': 42074 IDs))


## Get the name and the "first" appearance for actors
In the IMDb list anyway. I have no idea how they're ordered in "knownForTitles".

In [6]:
df = first_title(seed=5)
df.head()

Unnamed: 0,nconst,firstTitle
5536,nm0038172,tt0063897
5882,nm0040962,tt0043440
105691,nm0865925,tt0373558
115067,nm0941259,tt5558956
126661,nm1229926,tt0329418


In [7]:
df

Unnamed: 0,nconst,firstTitle
5536,nm0038172,tt0063897
5882,nm0040962,tt0043440
105691,nm0865925,tt0373558
115067,nm0941259,tt5558956
126661,nm1229926,tt0329418
...,...,...
87266,nm0709659,tt0027778
64498,nm0513959,tt0456810
46020,nm0364288,tt0022085
24105,nm0186833,tt0072308


In [8]:
translator.translate(df)

Unnamed: 0,nconst,firstTitle
5536,nm0038172:Peter Aryans *1918†2001,tt0063897:Floris (original: Floris) *1969†1969
5882,nm0040962:Ugo Attanasio *1887†1969,tt0043440 not translated; default name=Title u...
105691,nm0865925:Manuel Toledano *1974†2007,tt0373558:Lo + plus (original: Lo + plus) *199...
115067,nm0941259:Roberta Woolley *1938†2017,tt5558956 not translated; default name=Title u...
126661,nm1229926:Carla Hansen *1906†2001,tt0329418 not translated; default name=Title u...
...,...,...
87266,nm0709659:Pamela Randell *1918†1991,tt0027778 not translated; default name=Title u...
64498,nm0513959:Vanja Lisak *1941†2015,tt0456810:Vecernja skola: Povratak upisanih (o...
46020,nm0364288:John Harrington *1882†1945,tt0022085 not translated; default name=Title u...
24105,nm0186833:John Crawford *1920†2010,tt0072308 not translated; default name=Title u...


In [9]:
df.head()

Unnamed: 0,nconst,firstTitle
5536,nm0038172,tt0063897
5882,nm0040962,tt0043440
105691,nm0865925,tt0373558
115067,nm0941259,tt5558956
126661,nm1229926,tt0329418


In [10]:
translator.translate(df, inplace=True)  # returns None
df.head()

Unnamed: 0,nconst,firstTitle
5536,nm0038172:Peter Aryans *1918†2001,tt0063897:Floris (original: Floris) *1969†1969
5882,nm0040962:Ugo Attanasio *1887†1969,tt0043440 not translated; default name=Title u...
105691,nm0865925:Manuel Toledano *1974†2007,tt0373558:Lo + plus (original: Lo + plus) *199...
115067,nm0941259:Roberta Woolley *1938†2017,tt5558956 not translated; default name=Title u...
126661,nm1229926:Carla Hansen *1906†2001,tt0329418 not translated; default name=Title u...
