In [9]:
import pandas as pd
import boto3
from dotenv import load_dotenv
import os
import io
load_dotenv()

AWS_ACCESS_KEY_ID = os.environ.get('AWS_ACCESS_KEY_ID')
AWS_SECRET_ACCESS_KEY = os.environ.get('AWS_SECRET_ACCESS_KEY')
S3_SERVICE_NAME = 's3'
S3_ENDPOINT_URL = os.environ.get('S3_ENDPOINT_URL')
BUCKET_NAME = os.environ.get('S3_BUCKET_NAME')

session = boto3.session.Session()

s3 = session.client(
        service_name=S3_SERVICE_NAME,
        endpoint_url=S3_ENDPOINT_URL,
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY
    )

items_parquet = 'recsys/data/items_parquet/'
s3.upload_file('items.parquet', BUCKET_NAME, items_parquet)

personal_als_parquet = 'recsys/recommendations/personal_als_parquet/'
s3.upload_file('personal_als.parquet', BUCKET_NAME, personal_als_parquet)

top_popular_parquet = 'recsys/recommendations/top_popular_parquet/'
s3.upload_file('top_popular.parquet', BUCKET_NAME, top_popular_parquet)

recommendations_parquet = 'recsys/recommendations/recommendations_parquet/'
s3.upload_file('recommendations.parquet', BUCKET_NAME, recommendations_parquet)

als_model_npz = 'recsys/model/als_model_npz/'
s3.upload_file('als_model.npz', BUCKET_NAME, als_model_npz)

In [12]:
for key in s3.list_objects(Bucket=BUCKET_NAME)['Contents']:
    print(key["Key"])

0/04aa86233c4f4d9e8124496e88bff726/artifacts/cv/MLmodel
0/04aa86233c4f4d9e8124496e88bff726/artifacts/cv/conda.yaml
0/04aa86233c4f4d9e8124496e88bff726/artifacts/cv/input_example.json
0/04aa86233c4f4d9e8124496e88bff726/artifacts/cv/model.cb
0/04aa86233c4f4d9e8124496e88bff726/artifacts/cv/model.pkl
0/04aa86233c4f4d9e8124496e88bff726/artifacts/cv/python_env.yaml
0/04aa86233c4f4d9e8124496e88bff726/artifacts/cv/requirements.txt
0/234ed0b87c30488a8f4c4f0e1cc91396/artifacts/cv/MLmodel
0/234ed0b87c30488a8f4c4f0e1cc91396/artifacts/cv/conda.yaml
0/234ed0b87c30488a8f4c4f0e1cc91396/artifacts/cv/model.pkl
0/234ed0b87c30488a8f4c4f0e1cc91396/artifacts/cv/python_env.yaml
0/234ed0b87c30488a8f4c4f0e1cc91396/artifacts/cv/requirements.txt
0/4128f76bca3b4f8ca454d3a1eb3596c1/artifacts/cv/MLmodel
0/4128f76bca3b4f8ca454d3a1eb3596c1/artifacts/cv/conda.yaml
0/4128f76bca3b4f8ca454d3a1eb3596c1/artifacts/cv/input_example.json
0/4128f76bca3b4f8ca454d3a1eb3596c1/artifacts/cv/model.cb
0/4128f76bca3b4f8ca454d3a1eb3596c

In [15]:
# проверка - загрузка рекомендаций
obj = s3.get_object(Bucket=BUCKET_NAME, Key=top_popular_parquet)
df = pd.read_parquet(io.BytesIO(obj['Body'].read()))
df

Unnamed: 0,track_id,users,track_seq_median,avg_rating,popularity_weighted,track_name,album_name,artist_name,genre_name
0,47627256,67785,115.0,0.92,7.17,[Cradles],[Cradles],[Sub Urban],[electronics]
1,51516485,64328,124.0,0.87,6.94,[bad guy],"[Halloween Pop, Summer Aux, The Best Summer Al...",[Billie Eilish],[indie]
2,24692821,81719,69.0,1.11,6.26,[Way Down We Go],"[Chilled Acoustic, Summer Music 2017, A/B, Ant...",[KALEO],[indie]
3,32947997,77256,69.0,1.05,5.60,[Shape of You],"[÷, Shape of You]",[Ed Sheeran],[pop]
4,55561798,52338,149.0,0.71,5.54,[Аugust],[Renovatio],[Intelligency],[electronics]
...,...,...,...,...,...,...,...,...,...
95,672687,33514,73.0,0.46,1.13,[Toxicity],"[Rock Clássico, Gym Energy, Toxicity]",[System of A Down],[metal]
96,73538338,23265,150.0,0.32,1.12,[gorit],[gorit],[DOROFEEVA],"[pop, ruspop]"
97,63589631,19297,220.0,0.26,1.10,[Меня не будет],[Меня не будет],"[ANIKV, SALUKI]","[rusrap, rap]"
98,78814159,17389,264.0,0.24,1.10,[ZITTI E BUONI],"[Festival di Sanremo - le canzoni più belle, Z...",[Måneskin],[allrock]


In [38]:
# проверка - загрузка модели
from implicit.als import AlternatingLeastSquares
als_model = AlternatingLeastSquares(
    factors=50, iterations=50, regularization=0.05, random_state=0
)
obj = s3.get_object(Bucket=BUCKET_NAME, Key=als_model_npz)
als_model = als_model.load(io.BytesIO(obj['Body'].read()))
als_model.similar_items(1)

(array([     1, 313165, 276953,  46509, 125714, 150719,  37242, 206747,
        140493, 329881], dtype=int32),
 array([1.        , 0.9998784 , 0.9998726 , 0.99983907, 0.9998212 ,
        0.9997774 , 0.99972606, 0.99972266, 0.9997163 , 0.9997102 ],
       dtype=float32))