In [1]:
%load_ext autoreload
%autoreload 2

In [65]:
import os
import time

import numpy as np
import pandas as pd
import cvxpy as cp

from src.format_winston_data import WinstonDataFormatter
from src.format_winston_data import generate_and_save_unsupervised_data

from src.setup_problem import Setup

# データの準備

In [11]:
source_dir_path = "./data"
save_dir_path = "./inputs/winston_full"

data_formatter = WinstonDataFormatter(source_dir_path)
data_formatter.format_and_save_data(save_dir_path)

generate_and_save_unsupervised_data(save_dir_path,
                                    data_num=20,
                                    data_dim=3)

df_test = pd.read_csv(os.path.join(save_dir_path, "L_albatross(x).csv"), index_col=0)
display(df_test.head())
print(df_test.shape)

Done!
Done!


Unnamed: 0,R,G,B,label
0,0.48664,0.440085,0.342231,-1.0
1,0.427333,0.406967,0.37666,-1.0
2,0.509321,0.512104,0.39858,-1.0
3,0.581645,0.572609,0.54955,-1.0
4,0.505475,0.517679,0.42228,-1.0


(6500, 4)


In [16]:
source_dir_path = "./data"
save_dir_path = "./inputs/winston_10"

data_formatter = WinstonDataFormatter(source_dir_path)
data_formatter.format_and_save_data(save_dir_path,
                                    sample_num_per_animal=2)

generate_and_save_unsupervised_data(save_dir_path,
                                    data_num=20,
                                    data_dim=3)

df_test = pd.read_csv(os.path.join(save_dir_path, "L_albatross(x).csv"), index_col=0)
display(df_test.head())
print(df_test.shape)

Done!
Done!


Unnamed: 0,R,G,B,label
6187,0.459693,0.491589,0.324279,-1.0
5243,0.447529,0.434663,0.30718,-1.0
232,0.515541,0.511313,0.49294,-1.0
507,0.541042,0.428629,0.278256,-1.0
1653,0.489753,0.476355,0.410386,-1.0


(10, 4)


In [17]:
source_dir_path = "./data"
save_dir_path = "./inputs/winston_100"

data_formatter = WinstonDataFormatter(source_dir_path)
data_formatter.format_and_save_data(save_dir_path,
                                    sample_num_per_animal=20)

generate_and_save_unsupervised_data(save_dir_path,
                                    data_num=20,
                                    data_dim=3)

df_test = pd.read_csv(os.path.join(save_dir_path, "L_albatross(x).csv"), index_col=0)
display(df_test.head())
print(df_test.shape)

Done!
Done!


Unnamed: 0,R,G,B,label
5840,0.529877,0.468574,0.448287,-1.0
5283,0.4913,0.460999,0.391749,-1.0
5750,0.5,0.5,0.5,-1.0
5758,0.562385,0.490812,0.424986,-1.0
5702,0.47789,0.4777,0.443831,-1.0


(100, 4)


# 学習

## 各 p に対して，教師データの数が 10 個

In [68]:
data_dir_path = './inputs/winston_10'

file_list = os.listdir(data_dir_path)

L_files = [filename.split('.csv')[0] for filename in file_list 
           if filename.startswith('L') and filename.endswith('.csv')]

U_files = [filename.split('.csv')[0] for filename in file_list 
           if filename.startswith('U') and filename.endswith('.csv')]

file_names_dict = {
    'supervised': L_files,
    'unsupervised': U_files,
    'rule': ['rules_2']
}

problem_instance = Setup(data_dir_path, file_names_dict)
objective, constraints = problem_instance.main(c1=10, c2=10)

start_time = time.time()
problem = cp.Problem(objective, constraints)
result = problem.solve(verbose=True)
end_time = time.time()
print()
print(f'学習時間: {end_time - start_time} 秒')

Loading data ...
Done in 0.04456043243408203 seconds! 

Loading rules ...
Done in 0.00018024444580078125 seconds! 

Identifying predicates ...
Done in 0.001226663589477539 seconds! 

Constructing objective function ...
Done in 0.03122711181640625 seconds! 

Constructing constraints ...
Done in 0.6905784606933594 seconds! 

All done
                                     CVXPY                                     
                                     v1.3.2                                    
(CVXPY) Oct 28 02:51:26 PM: Your problem has 498 variables, 3450 constraints, and 0 parameters.
(CVXPY) Oct 28 02:51:26 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Oct 28 02:51:26 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Oct 28 02:51:26 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
------------------------------------------------------------------

In [37]:
# for formula in problem_instance.KB_origin:
#     print(formula)

# print()

# for new_formula in problem_instance.KB:
#     print(new_formula)

## 各 p に対して，教師データの数が 100 個

In [67]:
data_dir_path = './inputs/winston_100'

file_list = os.listdir(data_dir_path)

L_files = [filename.split('.csv')[0] for filename in file_list 
           if filename.startswith('L') and filename.endswith('.csv')]

U_files = [filename.split('.csv')[0] for filename in file_list 
           if filename.startswith('U') and filename.endswith('.csv')]

file_names_dict = {
    'supervised': L_files,
    'unsupervised': U_files,
    'rule': ['rules_2']
}

problem_instance = Setup(data_dir_path, file_names_dict)
objective, constraints = problem_instance.main(c1=10, c2=10)

start_time = time.time()
problem = cp.Problem(objective, constraints)
result = problem.solve(verbose=True)
end_time = time.time()
print()
print(f'学習時間: {end_time - start_time} 秒')

Loading data ...
Done in 0.055336952209472656 seconds! 

Loading rules ...
Done in 0.00020456314086914062 seconds! 

Identifying predicates ...
Done in 0.001505136489868164 seconds! 

Constructing objective function ...
Done in 0.36862802505493164 seconds! 

Constructing constraints ...
Done in 4.218665599822998 seconds! 

All done




                                     CVXPY                                     
                                     v1.3.2                                    
(CVXPY) Oct 28 02:51:02 PM: Your problem has 3288 variables, 11820 constraints, and 0 parameters.
(CVXPY) Oct 28 02:51:03 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Oct 28 02:51:03 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Oct 28 02:51:03 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Oct 28 02:51:04 PM: Compiling problem (target solver=ECOS).
(CVXPY) Oct 28 02:51:04 PM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuff

In [None]:
# テストデータ
test_data_dir_path = './inputs/winston_full'
test_file_list = os.listdir(test_data_dir_path)
test_L_files = [filename.split('.csv')[0] for filename in file_list 
           if filename.startswith('L') and filename.endswith('.csv')]