## ２つのCSVから差異を決める


In [5]:
import requests
import json
import pandas as pd
import boto3
from datetime import datetime, date, timedelta
import os
from io import StringIO


# APIのひな型
api_key = os.environ['steamapikey']
user_key = os.environ['steamuserkey']
url= "http://api.steampowered.com/IPlayerService/GetOwnedGames/v0001/?key={}&steamid={}&format=json&include_appinfo=1".format(api_key,user_key)


# S3のクレデンシャル情報は環境設定で指定。
S3_accesskey = os.environ['s3accesskey']
S3_secretkey = os.environ['s3secretkey']


# S3のバケットを指定
Bucket_profile = 'buc-steamapi'

In [15]:
def get_csv_from_s3_as_pd_dataframe(s3_file_key):
    s3 = boto3.client('s3',
                      aws_access_key_id=S3_accesskey,
                      aws_secret_access_key=S3_secretkey,
                      region_name='ap-northeast-1')

    obj = s3.get_object(Bucket=Bucket_profile, Key=s3_file_key)

    body = obj['Body']

    csv_string = body.read().decode('utf-8')

    return pd.read_csv(StringIO(csv_string))

In [16]:
today = datetime.today()
print(datetime.strftime(today, '%Y-%m-%d'))

tomorrow = today + timedelta(days=1)
yesterday = today - timedelta(days=1)

print("tomorrow -> " + datetime.strftime(tomorrow, '%Y-%m-%d'))
print("yesterday -> " + datetime.strftime(yesterday, '%Y-%m-%d'))


datetime.strftime(yesterday, '%Y-%m-%d')

2019-05-24
tomorrow -> 2019-05-25
yesterday -> 2019-05-23


'2019-05-23'

In [17]:
df_today = get_csv_from_s3_as_pd_dataframe('dailydata/df_{}.csv'.format(datetime.strftime(today, '%Y-%m-%d')))
df_today = df_today[["appid","name","playtime_forever","day"]]

In [29]:
df_yesterday = get_csv_from_s3_as_pd_dataframe('dailydata/df_{}.csv'.format(datetime.strftime(yesterday, '%Y-%m-%d')))
df_yesterday= df_yesterday[["appid","name","playtime_forever","day"]]

In [39]:
df_twodays = df_today.merge(df_yesterday , how="left", left_on="appid",right_on = "appid",suffixes=('', '_y'))
df_twodays["playtime_daily"] = df_twodays["playtime_forever"] - df_twodays["playtime_forever_y"]

In [42]:
df_lomited_twodays = df_twodays[df_twodays["playtime_forever"] != df_twodays["playtime_forever_y"]]

In [49]:
df_lomited_twodays = df_lomited_twodays[["appid","name","day","playtime_daily"]]

In [53]:
def  upload_df_to_s3_as_csv(df):
    s3 = boto3.resource('s3',
                      aws_access_key_id=S3_accesskey,
                      aws_secret_access_key=S3_secretkey,
                      region_name='ap-northeast-1')

    
    csv_buffer = StringIO()
    df.to_csv(csv_buffer,index=False)
    s3.Object(Bucket_profile, 'processed_data/df_daily.csv').put(Body=csv_buffer.getvalue())

In [55]:
upload_df_to_s3_as_csv(df_lomited_twodays)