In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from functools import reduce
from difflib import SequenceMatcher
from math import isclose
import pandas as pd
import numpy as np

def create_ratio_name(x, key, ratio_label="ratio_name"):
    x[ratio_label] = SequenceMatcher(None, x.name.value, key).ratio()
    return x

def create_ratio_date(x, key, ratio_label="ratio_date"):
    try:
        year = int(x.yearpublished.value.strip())
    except:
        year = 0
    x[ratio_label] = year/key
    if x[ratio_label] > 1:
        x[ratio_label] = 1/x[ratio_label]
    return x

def sort_by_closest(l, key, create_ratio_field=create_ratio_name):
    l = list(map(lambda x: create_ratio_field(x, key), l))
    l.sort(reverse=True,key=lambda x: x.ratio_name)
    return l

def pick_closest(x, y):
    ratio_name = "ratio_name"
    ratio_date = "ratio_date"
    
    if x[ratio_name] > y[ratio_name]:
        return x

    if isclose(x[ratio_name], y[ratio_name]):
        return x if x[ratio_date] > y[ratio_date] else y

    return y

def get_closest_match(name, date):
    l = conn.search(name)
    hits = int(l["items"].total)
    
    if hits == 0:
        return {'id': "not_found"}
    elif hits > 1:
        l = l['items'].item
        l = map(lambda x: create_ratio_name(x, name), l)
        l = map(lambda x: create_ratio_date(x, date), l)
        match = reduce(pick_closest, l)
        return match

    return l['items'].item


In [3]:
import json
from libbgg.apiv1 import BGG
# You can also use version 2 of the api:
from libbgg.apiv2 import BGG as BGG2

conn = BGG2()
game_name = "7 wonders"
realease_date = 2020
# conn.search(game_name)
# get_closest_match(game_name, realease_date)


In [4]:
import Ludopedia
from tqdm import tqdm

collection_name = 'scaroni'
game_type = "base"
parser = Ludopedia.CollectionParser()
parser.fetch_collection(collection_name, tipo='colecao', tipo_jogo=game_type)    
gameParser = Ludopedia.GamePageParser()

def get_bgg_info(g):
    game_title = g['title'].replace(':', '').replace('!', '')
    results = conn.search(game_title)
    if "boardgame" not in results.boardgames:
        g['bgg_id'] = "not found"
    elif type(results.boardgames.boardgame) is list:
        g['bgg_id'] = sort_by_closest(results.boardgames.boardgame, game_title)[0]['objectid']
    else:
        g['bgg_id'] = results.boardgames.boardgame.objectid
    return g

def get_ludopedia_info(g):
    info = gameParser.parse_game_page(g['link'])
    for i in info:
        g[i] = info[i]
    
    return g

def fetch_all_info(g):
    g = get_ludopedia_info(g)
    game_title = g['title'].replace(':', '').replace('!', '')
    g['bgg_id'] = get_closest_match(game_title, g['release_date'])['id']
    
    return g

# collection = list(map(get_bgg_info, tqdm(parser.collection)))
# collection = list(map(get_ludopedia_info, tqdm(collection)))
collection = list(map(fetch_all_info, tqdm(parser.collection)))
collection = pd.DataFrame(collection)

100%|██████████| 61/61 [03:06<00:00,  3.05s/it]


In [5]:
characteristics = pd.read_csv("csvs/game_escolheitor.csv")
characteristics.columns = ['title', 'players', 'experience', 'interaction', 'weight', "learning_curve", "duration"]
prices = pd.read_csv("csvs/precos.csv")
prices = prices[prices.columns[:-3]]
prices.columns = ['title', 'sleeved', 'how_many_cards', 'money_spent', 'obs']

collection_data = collection.join(characteristics.set_index('title'), on='title')
collection_data = collection_data.join(prices.set_index('title'), on='title')

collection_data.rating = collection_data.rating.apply(lambda x: np.NaN if x == "S/N" else x).astype(float)
collection_data[collection_data["bgg_id"] == '177590']

Unnamed: 0,link,title,owned,release_date,has,had,bgg_id,rating,description,players,experience,interaction,weight,learning_curve,duration,sleeved,how_many_cards,money_spent,obs
0,https://www.ludopedia.com.br/jogo/13-days-the-...,13 Days: The Cuban Missile Crisis,True,2015,35,4,177590,,,2,tática,indireta,leve,1 partida,30-45 min,Sim,,70.0,sleeves padrão


In [6]:
collection_data["money_spent"] = collection_data["money_spent"].apply(lambda x: np.NaN if x == 0 else x)

collection_data.describe()

Unnamed: 0,release_date,has,had,rating,money_spent
count,61.0,61.0,61.0,49.0,40.0
mean,2011.459016,899.770492,115.459016,7.75102,161.125
std,8.692092,923.216901,127.724909,1.00522,144.769877
min,1980.0,0.0,0.0,5.0,20.0
25%,2010.0,189.0,20.0,7.0,60.0
50%,2015.0,600.0,68.0,8.0,120.0
75%,2017.0,1268.0,173.0,8.5,196.25
max,2020.0,4246.0,502.0,10.0,640.0


In [8]:
collection_data.to_csv("collection_full_info.csv")

In [7]:
import boto3

dynamo_access = pd.read_csv("dynamo.csv")

db = boto3.resource(
    'dynamodb',
    aws_access_key_id=dynamo_access['Access key ID'],
    aws_secret_access_key=dynamo_access['Secret access key'],
    region_name='sa-east-1')

t = db.Table("boardgames")
for g in collection_data.to_dict("records"):
    t.put_item(Item=g)

TypeError: Float types are not supported. Use Decimal types instead.