## 実装した機能を使って実験を行う

想定としては、最初に年齢が若い人のデータから手に入ったという状況。

最初に若い年齢のデータのみでトレーニングを行う。

その後、年齢が上がるにつれて精度が悪くなる様子を観測する。

最後に、全年齢のデータを使ってトレーニングを行い、結果を比較する。

In [1]:
#新たに作成したモジュールなどをimport
import sys
import json
import requests
import os
import pandas as pd
sys.path.append("../src")
from utils import dataframe_to_dict

host_name = "http://localhost:8081"

## データベースをリセットする

In [None]:
url = host_name + "/reset_db"
res = requests.get(url)

print(res.content)

## 使用するデータを取得

In [3]:
# URL to the Titanic dataset
url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
# Read data from the URL
raw_data = pd.read_csv(url)

print('データ数=>', len(raw_data))

データ数=> 891


In [4]:
raw_data[0:10]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


### 年齢順に並べ替える

In [5]:
raw_data = raw_data.sort_values('Age')
raw_data[0:10]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
803,804,1,3,"Thomas, Master. Assad Alexander",male,0.42,0,1,2625,8.5167,,C
755,756,1,2,"Hamalainen, Master. Viljo",male,0.67,1,1,250649,14.5,,S
644,645,1,3,"Baclini, Miss. Eugenie",female,0.75,2,1,2666,19.2583,,C
469,470,1,3,"Baclini, Miss. Helene Barbara",female,0.75,2,1,2666,19.2583,,C
78,79,1,2,"Caldwell, Master. Alden Gates",male,0.83,0,2,248738,29.0,,S
831,832,1,2,"Richards, Master. George Sibley",male,0.83,1,1,29106,18.75,,S
305,306,1,1,"Allison, Master. Hudson Trevor",male,0.92,1,2,113781,151.55,C22 C26,S
827,828,1,2,"Mallet, Master. Andre",male,1.0,0,2,S.C./PARIS 2079,37.0042,,C
381,382,1,3,"Nakid, Miss. Maria (""Mary"")",female,1.0,0,2,2653,15.7417,,C
164,165,0,3,"Panula, Master. Eino Viljami",male,1.0,4,1,3101295,39.6875,,S


In [6]:
raw_data[400:405]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
726,727,1,2,"Renouf, Mrs. Peter Henry (Lillian Jefferys)",female,30.0,3,0,31027,21.0,,S
244,245,0,3,"Attalah, Mr. Sleiman",male,30.0,0,0,2694,7.225,,C
488,489,0,3,"Somerton, Mr. Francis William",male,30.0,0,0,A.5. 18509,8.05,,S
257,258,1,1,"Cherry, Miss. Gladys",female,30.0,0,0,110152,86.5,B77,S
178,179,0,2,"Hale, Mr. Reginald",male,30.0,0,0,250653,13.0,,S


## データベースにデータの登録を行う

若い順に４００人のデータを使う。だいたい、30歳くらい。

In [7]:
#データを辞書型のリストに変換する
result_dict = dataframe_to_dict(raw_data[0:1000],mode='sex_to_num')

#データをデータベースに登録する
url = host_name + "/upload_data"
res = requests.post(url, json=result_dict)

print(res.content)

b'{"transaction_result":"success"}'


In [8]:
#DBからデータを取得する
url = host_name + "/get_data"
data_index={'start_index':400,'end_index':405}#取得するindexの範囲を指定する
res = requests.post(url, json=data_index)

#resはバイナリデータで返される, decodeで文字列に変換
#その後、{}で囲まれた部分を、json.loads()で辞書型データに変換
data_list = json.loads(res.content.decode('utf-8'))
df = pd.DataFrame(data_list)
df

Unnamed: 0,data_id,survived,sex,fare,pclass,upload_date,age
0,400,0,0,8.05,3,2024-05-19T18:53:55,30
1,401,1,1,21.0,2,2024-05-19T18:53:55,30
2,402,0,0,7.225,3,2024-05-19T18:53:55,30
3,403,0,0,8.05,3,2024-05-19T18:53:55,30
4,404,1,1,86.5,1,2024-05-19T18:53:55,30


## 若年層のデータのみでモデルの作成

In [9]:
data = {'start_index':0, 'end_index':400, 'my_model_name':'young_age_model'}#トレーニングに使うindexの範囲を指定する
url = host_name + "/train_new_model"
res = requests.post(url, json=data)

data_list = json.loads(res.content.decode('utf-8'))
print(data_list)

{'transaction_result': 'success'}


## 上の年齢のデータも入れてモデルを作成

In [10]:
data = {'start_index':0, 'end_index':1000, 'my_model_name':'all_age_model'}#トレーニングに使うindexの範囲を指定する
url = host_name + "/train_new_model"
res = requests.post(url, json=data)

data_list = json.loads(res.content.decode('utf-8'))
print(data_list)

{'transaction_result': 'success'}


## データベースからモデルの情報を取得する

model_id に -1を指定するとすべてのモデルデータが取得できる

In [11]:
data = {'version_id_1' : -1}
url = host_name + "/get_model"
res = requests.post(url, json=data)

data_list = json.loads(res.content.decode('utf-8'))
df = pd.DataFrame(data_list[0])
df

Unnamed: 0,pclass_coef,model_version_id,age_coef,training_iteration,my_model_name,sex_coef,training_date,fare_coef
0,-1.37796,1,-0.059821,34,young_age_model,2.07947,2024-05-19T18:54:08,-0.007757
1,-1.22101,2,-0.035792,42,all_age_model,2.4071,2024-05-19T18:55:00,0.000817


## モデルを評価する

version_id -> ２つのidを指定して、modelの比較ができる。idの指定は１つでも可能。

start(end)_index -> 指定した範囲のデータに対しての推論結果などを表示する

In [13]:
data = {'version_id_1' : 1,'version_id_2' : 2,'start_index':0, 'end_index':1000}
url = host_name + "/evaluate_model"
res = requests.post(url, json=data)