In [1]:
#元々インポートしてあったモジュール
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [2]:
#新たに作成したモジュールなどをimport
import sys
import json
import requests
import os
sys.path.append("../src")
from utils import dataframe_to_dict

host_name = "http://localhost:8081"

In [3]:
# URL to the Titanic dataset
url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
# Read data from the URL
data = pd.read_csv(url)

In [4]:
data[0:2]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C


In [5]:
#データフレームをdict形式に変換する
success_result_dict = dataframe_to_dict(data[0:2])
print('成功例')
print(success_result_dict)

#['Survived', 'Pclass', 'Sex', 'Age', 'Fare']の項目が揃っていないデータはエラーを返す
false_result_dict = dataframe_to_dict(data[0:2]['Name'])
print('失敗例')
print(false_result_dict)

print('取得したデータのtype')
print(type(success_result_dict[0]))

成功例
[{'Survived': 0, 'Pclass': 3, 'Sex': 0, 'Age': 22.0, 'Fare': 7.25}, {'Survived': 1, 'Pclass': 1, 'Sex': 1, 'Age': 38.0, 'Fare': 71.2833}]
"None of [Index(['Survived', 'Pclass', 'Sex', 'Age', 'Fare'], dtype='object')] are in the [index]"
失敗例
正しいデータ形式ではありません。
取得したデータのtype
<class 'dict'>


In [6]:
#データを辞書型のリストに変換する
result_dict = dataframe_to_dict(data)

print(len(result_dict))

714


In [11]:
result_dict

[{'Survived': 0, 'Pclass': 3, 'Sex': 0, 'Age': 22.0, 'Fare': 7.25},
 {'Survived': 1, 'Pclass': 1, 'Sex': 1, 'Age': 38.0, 'Fare': 71.2833},
 {'Survived': 1, 'Pclass': 3, 'Sex': 1, 'Age': 26.0, 'Fare': 7.925},
 {'Survived': 1, 'Pclass': 1, 'Sex': 1, 'Age': 35.0, 'Fare': 53.1},
 {'Survived': 0, 'Pclass': 3, 'Sex': 0, 'Age': 35.0, 'Fare': 8.05},
 {'Survived': 0, 'Pclass': 1, 'Sex': 0, 'Age': 54.0, 'Fare': 51.8625},
 {'Survived': 0, 'Pclass': 3, 'Sex': 0, 'Age': 2.0, 'Fare': 21.075},
 {'Survived': 1, 'Pclass': 3, 'Sex': 1, 'Age': 27.0, 'Fare': 11.1333},
 {'Survived': 1, 'Pclass': 2, 'Sex': 1, 'Age': 14.0, 'Fare': 30.0708},
 {'Survived': 1, 'Pclass': 3, 'Sex': 1, 'Age': 4.0, 'Fare': 16.7},
 {'Survived': 1, 'Pclass': 1, 'Sex': 1, 'Age': 58.0, 'Fare': 26.55},
 {'Survived': 0, 'Pclass': 3, 'Sex': 0, 'Age': 20.0, 'Fare': 8.05},
 {'Survived': 0, 'Pclass': 3, 'Sex': 0, 'Age': 39.0, 'Fare': 31.275},
 {'Survived': 0, 'Pclass': 3, 'Sex': 1, 'Age': 14.0, 'Fare': 7.8542},
 {'Survived': 1, 'Pclass': 2, 

In [14]:
#データをデータベースに登録する
url = host_name + "/upload_data"
res = requests.post(url, json=result_dict)

print(res.content)

b'{"transaction_result":"success"}'


In [15]:
#DBからデータを取得する
url = host_name + "/get_data"
data_index={'start_index':1,'end_index':5}#取得するindexの範囲を指定する
res = requests.post(url, json=data_index)

#resはバイナリデータで返される, decodeで文字列に変換
#その後、{}で囲まれた部分を、json.loads()で辞書型データに変換
data_list = json.loads(res.content.decode('utf-8'))
data_list

[{'survived': 0,
  'sex': 0,
  'data_id': 1,
  'fare': 7.75,
  'upload_date': '2024-05-12',
  'pclass': 3,
  'age': 32},
 {'survived': 0,
  'sex': 0,
  'data_id': 2,
  'fare': 7.75,
  'upload_date': '2024-05-12',
  'pclass': 3,
  'age': 32},
 {'survived': 0,
  'sex': 0,
  'data_id': 3,
  'fare': 7.75,
  'upload_date': '2024-05-12',
  'pclass': 3,
  'age': 32},
 {'survived': 0,
  'sex': 0,
  'data_id': 4,
  'fare': 7.25,
  'upload_date': '2024-05-12',
  'pclass': 3,
  'age': 22},
 {'survived': 1,
  'sex': 1,
  'data_id': 5,
  'fare': 71.2833,
  'upload_date': '2024-05-12',
  'pclass': 1,
  'age': 38}]

In [25]:
#新しいモデルを作成する
data_index={'start_index':0,'end_index':10000}#トレーニングに使うindexの範囲を指定する
url = host_name + "/train_new_model"
res = requests.post(url, json=data_index)

data_list = json.loads(res.content.decode('utf-8'))
print(data_list)

{'transaction_result': 'success', 'accuracy': 0.7986111111111112}


In [15]:
# Print the accuracy
print('Accuracy:', accuracy_score(y_test, predictions))

# Predict survival for a new passenger
new_passenger = pd.DataFrame({'Pclass': [], 'Sex': [1], 'Age': [22.0], 'Fare': [7.25]})
survival_prediction = model.predict(new_passenger)
print('Survival prediction for the new passenger:', survival_prediction)

Accuracy: 0.7552447552447552
Survival prediction for the new passenger: [1]


In [16]:
model.decision_function(new_passenger)

array([0.48691626])

In [7]:
model.__dict__

{'penalty': 'l2',
 'dual': False,
 'tol': 0.0001,
 'C': 1.0,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'class_weight': None,
 'random_state': None,
 'solver': 'lbfgs',
 'max_iter': 100,
 'multi_class': 'auto',
 'verbose': 0,
 'warm_start': False,
 'n_jobs': None,
 'l1_ratio': None,
 'feature_names_in_': array(['Pclass', 'Sex', 'Age', 'Fare'], dtype=object),
 'n_features_in_': 4,
 'classes_': array([0, 1]),
 'n_iter_': array([44], dtype=int32),
 'coef_': array([[-1.24083747e+00,  2.53036817e+00, -4.24559240e-02,
          2.27255198e-04]]),
 'intercept_': array([2.61144324])}

In [8]:
model.coef_

array([[-1.24083747e+00,  2.53036817e+00, -4.24559240e-02,
         2.27255198e-04]])