In [1]:
import random
import numpy as np
import pandas as pd
import pickle
import sklearn



In [2]:
print("scikit-learn version:", sklearn.__version__) #버젼을 맞춰줘야합니다 1.2.2
print("numpy version:", np.__version__) #버젼을 맞춰줘야합니다 1.24.3

scikit-learn version: 1.2.2
numpy version: 1.24.3


In [3]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [4]:
"""
chromosome_length:
염색체의 길이를 나타낸다.
population_size:
유전 알고리즘에서 사용하는 인구 크기를 나타낸다. 이 크기만큼의 염색체들이 각 세대마다 존재하게 된다.

mutation_rate:
변이 확률을 나타낸다. 이 확률에 따라 각 염색체에 대해 변이가 발생할지를 결정한다.

crossover_rate:
교차 확률을 나타낸다. 이 확률에 따라 선택된 부모 염색체들 사이에서 교차 연산이 일어날지를 결정한다. 

max_generations:
유전 알고리즘이 실행되는 최대 세대 수를 나타낸다. 이 값을 초과하면 알고리즘이 종료된다.

df
최적화 대상이 되는 공연들의 feature들을 포함한 데이터프레임(좌석 점유율은 포함하지 않는다).
인덱스를 초기화 한 후 입력해야 한다.

option_date
조정가능한 날짜들의 데이터프레임.

model
좌석 점유율을 예측할 수 있는 모델.

entropy
적합도 식에 entropy 제약식을 추가할 것인지 결정한다.
"""

class GeneticAlgorithm:
    def __init__(self, population_size, mutation_rate, crossover_rate, max_generations, df, option_date, model, entropy = False):
        self.chromosome_length = option_date.shape[0]
        self.population_size = population_size
        self.mutation_rate = mutation_rate
        self.crossover_rate = crossover_rate
        self.max_generations = max_generations
        self.population = self.initialize_population()
        self.df = df
        self.option_date = option_date
        self.model = model
        self.entropy = entropy
        assert self.df.shape[0] <= option_date.shape[0]
        
    def _entropy(self,pred):
        base = len(pred)
        pk = pred/np.sum(pred)
        return -np.sum(pk * np.log(pk)) / np.log(base)
    
    def initialize_population(self):
        #chromosome의 숫자는 df의 인덱스의 숫자이다.
        return [self.create_chromosome() for _ in range(self.population_size)]

    def create_chromosome(self):
        return random.sample(range(self.chromosome_length), self.chromosome_length)

    def fitness_function(self, chromosome):
        #염색체에서 option date의 행의 개수 이상의 숫자는 더미 공연을 의미한다.
        chromosome = np.array(chromosome)
        sub_df = self.df.drop(self.option_date.columns, axis = 1)
        adj_df = pd.concat([self.option_date[chromosome<self.df.shape[0]].reset_index(drop=True),
                            sub_df.loc[chromosome[chromosome<self.df.shape[0]],:].reset_index(drop=True)], axis = 1)
        adj_df = adj_df[self.df.columns]
        pred = self.model.predict(adj_df)
        modified_pred = np.where(pred > 1, 1, pred)
        if self.entropy:
            return self._entropy(modified_pred)*np.sum(modified_pred)
        else:
            return np.sum(modified_pred)

    def select_parents(self):
        # Roulette wheel selection
        fitness_values = [self.fitness_function(chromosome) for chromosome in self.population]
        total_fitness = sum(fitness_values)
        pick = random.uniform(0, total_fitness)
        current = 0
        for chromosome, fitness in zip(self.population, fitness_values):
            current += fitness
            if current > pick:
                return chromosome

    def partially_matched_crossover(self, parent1, parent2):
        size = len(parent1)
        p1, p2 = [-1] * size, [-1] * size

        # 랜덤한 범위 내에서 부모의 일부 구간 선택
        cxpoint1 = random.randint(0, size - 1)
        cxpoint2 = random.randint(0, size - 1)
        if cxpoint2 < cxpoint1:
            cxpoint1, cxpoint2 = cxpoint2, cxpoint1

        # 선택된 구간을 자식에 복사
        for i in range(cxpoint1, cxpoint2 + 1):
            p1[i] = parent2[i]
            p2[i] = parent1[i]

        # 아직 복사하지 않은 부분을 찾아서 자식에 추가
        for i in range(size):
            if parent1[i] not in p1:
                for j in range(size):
                    if p1[j] == -1:
                        p1[j] = parent1[i]
                        break
            if parent2[i] not in p2:
                for j in range(size):
                    if p2[j] == -1:
                        p2[j] = parent2[i]
                        break

        return p1, p2

    def mutate(self, chromosome):
        # Swap mutation
        if random.random() < self.mutation_rate:
            index1, index2 = random.sample(range(self.chromosome_length), 2)
            chromosome[index1], chromosome[index2] = chromosome[index2], chromosome[index1]
        return chromosome

    def run(self):
        for generation in range(self.max_generations):
            new_population = []

            while len(new_population) < self.population_size:
                parent1 = self.select_parents()
                parent2 = self.select_parents()

                if random.random() < self.crossover_rate:
                    child1, child2 = self.partially_matched_crossover(parent1, parent2)
                else:
                    child1, child2 = parent1[:], parent2[:]

                child1 = self.mutate(child1)
                child2 = self.mutate(child2)
                
                new_population.extend([child1, child2])

            self.population = new_population[:self.population_size]

            # 현재 세대의 가장 우수한 염색체의 적합도를 출력.
            best_fitness = max([self.fitness_function(chromo) for chromo in self.population])
            print(f"Generation {generation}: Best Fitness = {best_fitness}")

        return max(self.population, key=self.fitness_function)
    

In [5]:
# load the model from disk
model = pickle.load(open('../model/gbm_fin_model_dae.sav', 'rb'))
model
print(model)

GradientBoostingRegressor(alpha=0.7, criterion='squared_error',
                          learning_rate=0.15, loss='huber', n_estimators=150,
                          random_state=40)


In [6]:
df = pd.read_csv(r'../preprocessed_data/대구콘서트하우스/모델학습및테스트데이터_최종.csv', index_col = 0)
df.reset_index(drop = True, inplace = True)
display(df)

Unnamed: 0,소요시간,관람연령,아동공연 여부,축제 여부,내한공연 여부,단독판매여부,공연시작년도,공연시작월,공연시작일,공연시작시분,공휴일여부,좌석등급개수,평균티켓가격,코로나표준점수,검색량,출연횟수,dayofweek_fri,dayofweek_mon,dayofweek_sat,dayofweek_sun,dayofweek_thu,dayofweek_tue,dayofweek_wed,대중무용_스트리트/스포츠댄스,대중음악_기타,대중음악_재즈/월드뮤직,뮤지컬_악극,복합_다원/융복합,복합_복합,서양음악(클래식)_기악,서양음악(클래식)_성악,서양음악(클래식)_오페라,한국음악(국악)_기악,한국음악(국악)_성악,한국음악(국악)_혼합,21세기현대음악연구회,AM예술기획,CM심포니오케스트라,CM코리아,EIM음악의모든것,EnoshEnsemble,FullMoonCompany,ILCLASSICO,KAG기획,OBS앙상블,Unknown,WorldCultureNetworks,YJ기획,가델클래식예술기획,경희설비올라클래스,계명대학교음악공연예술대학,나래아트컴퍼니,노바솔로이스츠,노부스앙상블,뉴아트예술기획,다매체예술단ARS,대구광역시,대구교육대학교,대구국제현대음악제,대구문화재단,대구솔리스트플루트앙상블,대구스트링스심포니오케스트라,대구시립예술단,대구시립합창단,대구음악협회,대구콘서트하우스,대구트롬본앙상블,독일가곡회,라모아트컴퍼니,락아츠컴퍼니,르송앙상블,리에목관5중주,모멘토앙상블,모음앙상블,문화체육관광부,버드Bud.Ltd,보아즈앙상블,보엠아트,부디앙상블,브라더스,솔리스츠K,솔리스트첼로앙상블경상,아츠컴퍼니판,아트라스,아트메이트,아티스트트리오,앙상블아스트로스,앙상블유터피,앙상블토니카,어니스트필하모닉오케스트라,에이엠예술기획,에이치엠에스컴퍼니,엔에이치엔티켓링크,엠케이예술,우드드로잉,월드오케스트라시리즈조직위원회,위즈뮤직컴퍼니,이음기획,젊은음악인들의모임,젊은음악인의모임,창조문화포럼,천마피아노연구회,첼리스텐,카프리스뮤직,케이에이엔엔터테인먼트,코리안혼오케스트라,콰르텟콘아니마,킴스클래식예술기획,트리오두,프란츠스튜디오,프란츠클래식,프로아트엔터테인먼트,한국가곡회,한국명곡진흥협회,한국문화예술회관연합회,좌석점유율
0,100,7,0,0,0,0,2022,7,22,1170,0,1,30000.0,0,2820,10,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.475806
1,60,7,0,1,0,0,2020,8,19,960,0,1,20000.0,1,1670,31,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.959184
2,90,7,0,0,0,1,2022,5,20,1170,0,1,10000.0,0,0,3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.745968
3,90,7,0,0,0,0,2022,4,16,1140,0,1,0.0,1,0,2,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.455645
4,60,7,0,0,0,1,2022,4,22,1170,0,1,10000.0,0,0,3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.576613
5,70,7,0,0,0,1,2019,11,6,1170,0,1,10000.0,0,0,6,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.423387
6,80,7,0,0,0,0,2021,5,5,1020,1,1,10000.0,2,0,1,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.965812
7,90,7,0,0,0,1,2021,4,28,1170,0,1,5000.0,2,0,7,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0.91453
8,60,7,0,1,0,1,2022,11,12,840,0,1,10000.0,0,0,1,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.880342
9,90,7,0,0,0,0,2022,4,14,1170,0,1,0.0,1,0,5,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.786325


In [5]:
sub_df = df[np.logical_and(df['공연시작년도'] == 2022, df['공연시작월'].between(5,7))]
sub_df_date = sub_df[['공연시작년도','공연시작월','공연시작일' ,'공연시작시분','dayofweek_fri', 'dayofweek_mon', 'dayofweek_sat',
       'dayofweek_sun', 'dayofweek_thu', 'dayofweek_tue', 'dayofweek_wed','공휴일여부']]
sub_df.drop(['좌석점유율'], axis = 1, inplace = True)
sub_df.reset_index(drop = True, inplace = True)

display(sub_df)
display(sub_df_date)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df.drop(['좌석점유율'], axis = 1, inplace = True)


Unnamed: 0,소요시간,관람연령,아동공연 여부,축제 여부,내한공연 여부,단독판매여부,공연시작년도,공연시작월,공연시작일,공연시작시분,...,코리안혼오케스트라,콰르텟콘아니마,킴스클래식예술기획,트리오두,프란츠스튜디오,프란츠클래식,프로아트엔터테인먼트,한국가곡회,한국명곡진흥협회,한국문화예술회관연합회
0,100,7,0,0,0,0,2022,7,22,1170,...,0,0,0,0,0,0,0,0,0,0
1,90,7,0,0,0,1,2022,5,20,1170,...,0,0,0,0,0,0,0,0,0,0
2,80,5,0,0,0,0,2022,5,13,1170,...,0,0,0,0,0,0,0,0,0,0
3,90,7,0,0,0,1,2022,6,14,1170,...,0,0,0,0,0,0,0,0,0,0
4,80,7,0,0,0,0,2022,5,11,1170,...,0,0,0,0,0,0,0,0,0,0
5,90,7,0,0,0,0,2022,6,10,1170,...,0,0,0,0,0,0,0,0,0,0
6,70,7,0,0,0,1,2022,6,23,1170,...,0,0,0,0,0,0,0,0,0,0
7,90,7,0,0,0,1,2022,5,5,1170,...,0,0,0,0,0,0,0,0,0,0
8,90,7,0,0,0,1,2022,6,5,1020,...,0,0,0,0,0,0,0,0,0,0
9,90,7,0,0,0,1,2022,5,31,1170,...,0,0,0,0,0,0,0,0,0,0


Unnamed: 0,공연시작년도,공연시작월,공연시작일,공연시작시분,dayofweek_fri,dayofweek_mon,dayofweek_sat,dayofweek_sun,dayofweek_thu,dayofweek_tue,dayofweek_wed,공휴일여부
0,2022,7,22,1170,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0
2,2022,5,20,1170,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0
11,2022,5,13,1170,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0
17,2022,6,14,1170,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0
18,2022,5,11,1170,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0
20,2022,6,10,1170,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0
23,2022,6,23,1170,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0
27,2022,5,5,1170,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1
28,2022,6,5,1020,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0
34,2022,5,31,1170,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0


In [8]:
input_arr=[[2022,5,3,1170,0,0,0,0,0,1,0,0],
[2022,5,4,1170,0,0,0,0,0,0,1,0],
[2022,5,6,1170,1,0,0,0,0,0,0,0],
[2022,5,7,1020,0,0,1,0,0,0,0,0],
[2022,5,8,1020,0,0,0,1,0,0,0,1],
[2022,5,10,1170,0,0,0,0,0,1,0,0],
[2022,5,12,1170,0,0,0,0,1,0,0,0],
[2022,5,14,1020,0,0,1,0,0,0,0,0],
[2022,5,15,1020,0,0,0,1,0,0,0,0],
[2022,5,17,1170,0,0,0,0,0,1,0,0],
[2022,5,21,1020,0,0,1,0,0,0,0,0],
[2022,5,22,1020,0,0,0,1,0,0,0,0],
[2022,5,24,1170,0,0,0,0,0,1,0,0],
[2022,5,26,1170,0,0,0,0,1,0,0,0],
[2022,5,27,1170,1,0,0,0,0,0,0,0],
[2022,5,28,1020,0,0,1,0,0,0,0,0],
[2022,5,29,1020,0,0,0,1,0,0,0,0],

[2022,6,1,1170,0,0,0,0,0,0,1,1],
[2022,6,2,1170,0,0,0,0,1,0,0,0],
[2022,6,3,1170,1,0,0,0,0,0,0,0],
[2022,6,4,1020,0,0,1,0,0,0,0,0],
[2022,6,7,1170,0,0,0,0,0,1,0,0],
[2022,6,8,1170,0,0,0,0,0,0,1,0],
[2022,6,9,1170,0,0,0,0,1,0,0,0],
[2022,6,15,1170,0,0,0,0,0,0,1,0],
[2022,6,16,1170,0,0,0,0,1,0,0,0],
[2022,6,17,1170,1,0,0,0,0,0,0,0],
[2022,6,18,1020,0,0,1,0,0,0,0,0],
[2022,6,19,1020,0,0,0,1,0,0,0,0],
[2022,6,21,1170,0,0,0,0,0,1,0,0],
[2022,6,22,1170,0,0,0,0,0,0,1,0],
[2022,6,24,1170,1,0,0,0,0,0,0,0],
[2022,6,26,1020,0,0,0,1,0,0,0,0],
[2022,6,29,1170,0,0,0,0,0,0,1,0],
[2022,6,30,1170,0,0,0,0,1,0,0,0],

[2022,7,3,1020,0,0,0,1,0,0,0,0],
[2022,7,5,1170,0,0,0,0,0,1,0,0],
[2022,7,6,1170,0,0,0,0,0,0,1,0],
[2022,7,7,1170,0,0,0,0,1,0,0,0],
[2022,7,8,1170,1,0,0,0,0,0,0,0],
[2022,7,9,1020,0,0,1,0,0,0,0,0],
[2022,7,10,1020,0,0,0,1,0,0,0,0],
[2022,7,12,1170,0,0,0,0,0,1,0,0],
[2022,7,13,1170,0,0,0,0,0,0,1,0],
[2022,7,15,1170,1,0,0,0,0,0,0,0],
[2022,7,16,1020,0,0,1,0,0,0,0,0],
[2022,7,19,1170,0,0,0,0,0,1,0,0],
[2022,7,23,1020,0,0,1,0,0,0,0,0],
[2022,7,24,1020,0,0,0,1,0,0,0,0],
[2022,7,26,1170,0,0,0,0,0,1,0,0],
[2022,7,28,1170,0,0,0,0,1,0,0,0],
[2022,7,29,1170,1,0,0,0,0,0,0,0],
[2022,7,30,1020,0,0,1,0,0,0,0,0],
[2022,7,31,1020,0,0,0,1,0,0,0,0]]

In [9]:
add_df = pd.DataFrame(input_arr,columns = sub_df_date.columns)
sub_df_date = pd.concat([sub_df_date, add_df], axis = 0)
sub_df_date.sort_values(by = ['공연시작월','공연시작일'], inplace = True)
sub_df_date.reset_index(drop = True, inplace = True)
display(sub_df_date)

Unnamed: 0,공연시작년도,공연시작월,공연시작일,공연시작시분,dayofweek_fri,dayofweek_mon,dayofweek_sat,dayofweek_sun,dayofweek_thu,dayofweek_tue,dayofweek_wed,공휴일여부
0,2022,5,1,1020,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0
1,2022,5,3,1170,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0
2,2022,5,4,1170,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0
3,2022,5,5,1170,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1
4,2022,5,6,1170,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0
5,2022,5,7,1020,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0
6,2022,5,8,1020,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1
7,2022,5,10,1170,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0
8,2022,5,11,1170,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0
9,2022,5,12,1170,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0


In [10]:
sub_df.reset_index(drop = True, inplace = True)

In [14]:
genetic = GeneticAlgorithm(population_size = 100, mutation_rate = 0.01, crossover_rate = 0.8, max_generations = 50, df = sub_df, 
                          option_date = sub_df_date, model = model, entropy = False)
genetic.run()

Generation 0: Best Fitness = 21.543285084088073
Generation 1: Best Fitness = 21.543285084088073
Generation 2: Best Fitness = 21.408002511802422
Generation 3: Best Fitness = 21.395533088261747
Generation 4: Best Fitness = 21.471849750251355
Generation 5: Best Fitness = 21.602408759723655
Generation 6: Best Fitness = 21.65639095728618
Generation 7: Best Fitness = 21.491923837250475
Generation 8: Best Fitness = 21.57861131927571
Generation 9: Best Fitness = 21.57861131927571
Generation 10: Best Fitness = 21.589704048845718
Generation 11: Best Fitness = 21.589704048845718
Generation 12: Best Fitness = 21.520551682929295
Generation 13: Best Fitness = 21.537613799432425
Generation 14: Best Fitness = 21.537520918814526
Generation 15: Best Fitness = 21.50817434024816
Generation 16: Best Fitness = 21.488116652302715
Generation 17: Best Fitness = 21.488822340023287
Generation 18: Best Fitness = 21.430319556364104
Generation 19: Best Fitness = 21.430319556364104
Generation 20: Best Fitness = 21.4

[65,
 52,
 15,
 74,
 66,
 55,
 0,
 10,
 33,
 77,
 73,
 19,
 43,
 40,
 38,
 13,
 46,
 57,
 6,
 27,
 28,
 7,
 53,
 22,
 59,
 42,
 58,
 47,
 56,
 75,
 78,
 8,
 9,
 50,
 20,
 26,
 25,
 14,
 54,
 29,
 35,
 60,
 62,
 1,
 69,
 24,
 63,
 12,
 45,
 16,
 23,
 2,
 67,
 71,
 72,
 61,
 51,
 31,
 64,
 4,
 32,
 76,
 17,
 41,
 49,
 34,
 68,
 48,
 39,
 30,
 36,
 11,
 3,
 44,
 70,
 5,
 21,
 37,
 18]

In [15]:
df[np.logical_and(df['공연시작년도'] == 2022, df['공연시작월'].between(5,7))].drop([74,101,229])['좌석점유율'].sum()

19.673387096774196

In [23]:
best = np.array([65, 52, 15, 74, 66, 55, 0, 10, 33, 77, 73, 19, 43, 40, 38, 13, 46, 57, 6, 27, 28, 7, 53, 22, 59, 42, 58, 47, 56, 
                 75, 78, 8, 9, 50, 20, 26, 25, 14, 54, 29, 35, 60, 62, 1, 69, 24, 63, 12, 45, 16, 23, 2, 67, 71, 72, 61, 51, 31, 
                 64, 4, 32, 76, 17, 41, 49, 34, 68, 48, 39, 30, 36, 11, 3, 44, 70, 5, 21, 37, 18])
s_df = sub_df.drop(sub_df_date.columns, axis = 1)
adj_df = pd.concat([sub_df_date[best<sub_df.shape[0]].reset_index(drop=True),
                    s_df.loc[best[best<sub_df.shape[0]],:].reset_index(drop=True)], axis=1)
adj_df = adj_df[sub_df.columns]
pred = model.predict(adj_df)
modified_pred = np.where(pred>1,1,pred)
print(modified_pred)
print(np.sum(modified_pred))


[0.88975614 0.58523228 0.94182349 0.67377487 0.89879389 0.94517859
 0.7375102  0.98713133 0.85695821 0.88668345 0.85730835 1.
 0.90592819 0.84796052 0.7059782  0.94496148 0.93190457 0.99929894
 0.6817135  0.92852234 0.93004702 0.83388485 0.69462029 0.89149983
 0.99434094]
21.55081147879645
