In [431]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [432]:
def read_instancia(file_path):
    """"Função para a leitura das instâncias geradas"""
    name = file_path.split('/')[-1].replace('.dat', '')
    with open(file_path, 'r') as file:
        lines = file.readlines()

    # 1. Lendo quantidade de itens e períodos
    items, periods = map(int, lines[0].split())

    # 2. Lendo número de plantas
    num_plants = int(lines[1].strip())

    # 3. Lendo capacidades das plantas
    capacities = [int(lines[i + 2].strip()) for i in range(num_plants)]
    capacities = np.tile(capacities, (periods, 1)).T  # Repete as capacidades ao longo dos períodos (deixar na forma j, t)

    # 4. Lendo a matriz de produção (tempo de produção, tempo de setup, custo de setup, custo de produção)
    production_problem_data = []
    start_line = 2 + num_plants
    production_time = np.zeros((items, num_plants))  # Inicializar listas para armazenar separadamente os tempos e custos
    setup_time = np.zeros((items, num_plants))
    setup_cost = np.zeros((items, num_plants))
    production_cost = np.zeros((items, num_plants))
    for i in range(num_plants * items):  # Preencher as matrizes com os dados lidos
        plant = i // items  # Determina a planta
        item = i % items    # Determina o item
        # Extrair os dados de cada linha
        prod_time, set_time, set_cost, prod_cost = map(float, lines[start_line + i].split())
        production_time[item, plant] = prod_time  # Preencher as respectivas matrizes
        setup_time[item, plant] = set_time
        setup_cost[item, plant] = set_cost
        production_cost[item, plant] = prod_cost

    # 5. Lendo os custos de inventário
    inventory_costs_line = start_line + num_plants * items
    inventory_costs = list(map(float, lines[inventory_costs_line].split()))  # Lê todos os valores de inventory_costs como uma única lista
    inventory_costs = np.array(inventory_costs).reshape(num_plants, -1)  # Divide a lista de custos de inventário por planta
    inventory_costs = inventory_costs.T  # Deixa na forma (i, j)

    # 6. Lendo a matriz de demanda (12 linhas)
    demand_matrix = []
    demand_start_line = inventory_costs_line + 1
    
    # Leitura inicial das demandas
    for i in range(periods):  # Lê as linhas de demandas para os períodos
        demands = list(map(int, lines[demand_start_line + i].split()))
        demand_matrix.append(demands)
    
    # Agora vamos dividir os valores de cada linha combinada entre as plantas
    final_demand_matrix = []
    for demands in demand_matrix:
        period_demand = []
        for j in range(num_plants):
            # Divide a demanda combinada por planta, assumindo que cada planta tem o mesmo número de itens
            plant_demand = demands[j*items:(j+1)*items]
            period_demand.append(plant_demand)
        final_demand_matrix.append(period_demand)
    
    # Transpor a matriz de demanda para o formato correto (itens, plantas, períodos)
    final_demand_matrix = np.array(final_demand_matrix)
    final_demand_matrix = np.transpose(final_demand_matrix, (2, 1, 0))  # Converte para o formato (itens, plantas, períodos)

    # 7. Reading transfer costs directly from the document as a matrix
    transfer_cost_matrix = []
    transfer_cost_line = demand_start_line + periods

    # Read the matrix of transfer costs line by line
    while transfer_cost_line < len(lines):
        line = lines[transfer_cost_line].strip()
        if line:
            # Split the line into individual cost values and convert them to float
            row = [float(value) for value in line.split()]
            transfer_cost_matrix.append(row)
        transfer_cost_line += 1

    # Convert to a numpy array (optional, if you want to work with numpy for matrix operations)
    transfer_costs = np.array(transfer_cost_matrix)

    return {"instance": name,
            "items": items,
            "periods": periods,
            "num_plants": num_plants,
            "capacities": capacities,
            "production_time": production_time,
            "setup_time": setup_time,
            "setup_cost": setup_cost,  
            "production_cost": production_cost,
            "inventory_costs": inventory_costs,
            "demand_matrix": final_demand_matrix,
            "transfer_costs": transfer_costs}


# Leitura e interpretação dos dados

In [433]:
# Exploração a partir de instância simples
problem_data = read_instancia('../instancias/multi_plant_instances/NAA02_12_2_10.dat')

In [434]:
# Produtos (i)
I = np.array([_ for _ in range(problem_data['items'])])
# Plantas (j)
J = np.array([_ for _ in range(problem_data['num_plants'])])
# Períodos (t)
T = np.array([_ for _ in range(problem_data['periods'])])
# Demanda (i, j, t)
d = np.array(problem_data['demand_matrix'])
# Capacidade (j, t)
cap = np.array(problem_data['capacities'])
# Tempo de produção (i, j)
b = np.array(problem_data['production_time'])
# Tempo de setup (i, j)
f = np.array(problem_data['setup_time'])
# Custo de produção (i, j)
c = np.array(problem_data['production_cost'])
# Custo de setup (i, j)
s = np.array(problem_data['setup_cost'])
# Custo de transporte (j, k)
r = np.array(problem_data['transfer_costs'])
# Custo de estoque (i, j)
h = np.array(problem_data['inventory_costs'])

In [472]:
problem_data['instance']

'NAA02_12_2_10'

In [469]:
# Sugestão GPT
import numpy as np

# Feature extraction for a given instance (corrected)
instance_features = {
    # Basic instance-level features
    'num_products': len(I),
    'num_plants': len(J),
    'num_periods': len(T),

    # Demand statistics
    'total_demand': np.sum(d),
    'avg_demand_per_product': np.mean(d),
    'variance_demand_per_product': np.var(d),
    'max_demand_per_product': np.max(d),
    'std_demand_per_product': np.std(d),

    # Capacity and utilization statistics
    'mean_utilization': np.mean([
        np.sum(f[:, j] + b[:, j] * d[:, j, t]) / cap[j, t]
        for t in T for j in J
    ]),
    'max_utilization': np.max([
        np.sum(f[:, j] + b[:, j] * d[:, j, t]) / cap[j, t]
        for t in T for j in J
    ]),
    'min_utilization': np.min([
        np.sum(f[:, j] + b[:, j] * d[:, j, t]) / cap[j, t]
        for t in T for j in J
    ]),
    'std_utilization': np.std([
        np.sum(f[:, j] + b[:, j] * d[:, j, t]) / cap[j, t]
        for t in T for j in J
    ]),

    # Cost statistics
    'avg_setup_cost': np.mean(s),
    'variance_setup_cost': np.var(s),
    'max_setup_cost': np.max(s),
    'std_setup_cost': np.std(s),
    'avg_production_cost': np.mean(c),
    'variance_production_cost': np.var(c),
    'max_production_cost': np.max(c),
    'std_production_cost': np.std(c),
    'total_transportation_cost': np.sum(r),
    'avg_inventory_cost': np.mean(h),
    'variance_inventory_cost': np.var(h),

    # Relationships between features
    'demand_to_capacity_ratio': np.sum(d) / np.sum(cap),
    'setup_to_production_cost_ratio': np.sum(s) / np.sum(c),
    'avg_demand_to_setup_cost_ratio': np.mean(d) / np.mean(s),
    'total_cost_to_demand_ratio': (np.sum(s) + np.sum(c)) / np.sum(d),
}

# Print corrected instance-level features
for feature, value in instance_features.items():
    print(f'{feature}: {value}')


num_products: 10
num_plants: 2
num_periods: 12
total_demand: 21946
avg_demand_per_product: 91.44166666666666
variance_demand_per_product: 2620.038263888889
max_demand_per_product: 180
std_demand_per_product: 51.18630934037821
mean_utilization: 1.0019875114870478
max_utilization: 1.384483758700696
min_utilization: 0.7451024327784892
std_utilization: 0.14744424776180076
avg_setup_cost: 415.5849999999999
variance_setup_cost: 47326.13127499999
max_setup_cost: 866.6
std_setup_cost: 217.5456992794847
avg_production_cost: 1.8850000000000002
variance_production_cost: 0.06127499999999999
max_production_cost: 2.4
std_production_cost: 0.24753787588973125
total_transportation_cost: 0.6
avg_inventory_cost: 0.325
variance_inventory_cost: 0.005875000000000001
demand_to_capacity_ratio: 0.2782765266788395
setup_to_production_cost_ratio: 220.46949602122012
avg_demand_to_setup_cost_ratio: 0.2200312009977903
total_cost_to_demand_ratio: 0.3804520185910872


# Features da instância

## Nº de plantas

In [435]:
len(J)

2

## Nº de produtos

In [436]:
len(I)

10

## Nº de períodos

In [437]:
len(T)

12

## Nº de variáveis binárias

In [438]:
len(I) * len(J) * len(T)

240

## Custos de produção

Devo procurar um valor representativo dos custos de produção para a instância

In [439]:
custos = []
for t in T:
    custo_plantas = 0
    for j in J:
        for i in I:
            custo_producao_i_j = s[i, j] + c[i, j] * d[i, j, t]  # Tempo de setup + produção para suprir a demanda do produto i na planta j no período t
            custo_plantas += custo_producao_i_j
    custos.append(custo_plantas)

In [440]:
custos

[np.float64(11638.099999999999),
 np.float64(11856.800000000001),
 np.float64(12253.700000000003),
 np.float64(11201.4),
 np.float64(12139.800000000001),
 np.float64(11542.2),
 np.float64(11370.500000000002),
 np.float64(11145.9),
 np.float64(12481.3),
 np.float64(12365.7),
 np.float64(11723.799999999997),
 np.float64(11100.1)]

In [441]:
# Custo médio para todos os períodos
np.mean(custos) / (len(I) * len(J))

np.float64(586.7470833333333)

## Tempos de produção

## Custos de setup

## Tempos de setup

## Uso de capacidade

### Capacidade com setup

In [458]:
# Utilização por planta e período
ocupacao = ([np.sum(f[:, j] + b[:, j] * d[:, j, t]) / cap[j, t] for t in T for j in J])

In [459]:
# Essa é bem fácil de fazer reverse engineering. A média dos tight é 1.1 e da capacidade normal próxima de 1.0
np.mean(ocupacao)

np.float64(1.0019875114870478)

In [460]:
np.std(ocupacao)

np.float64(0.14744424776180076)

In [461]:
# Pode ser uma boa ideia adicionar percebntis se forem trazer alguma informação, mas acredito que não é por esses parâmetros de geração
np.min(ocupacao)

np.float64(0.7451024327784892)

In [446]:
np.max(ocupacao)

np.float64(1.3844837587006962)

### Capacidade sem setup

Essa não vai mudar tanto também, mas posso ver

In [447]:
ocupacao = []
for t in T:
    for j in J:
        necessidade_planta = 0
        for i in I:
            tempo_producao_i = b[i, j] * d[i, j, t]  # Tempo de produção para suprir a demanda do produto i na planta j no período t
            necessidade_planta += tempo_producao_i
        utilizacao = necessidade_planta / cap[j, t]
        ocupacao.append(utilizacao)

In [448]:
np.mean(ocupacao)

np.float64(0.8520931044415264)