# Creating randomized schedule data in a CSV file 

In [9]:
import random
import csv
import os

# --- PARAMETRY ---
days = 30
shifts = 3
employees = 24 

folder = "data_csv"
os.makedirs(folder, exist_ok=True)
filename = os.path.join(folder, f"grafik_{days}d_{shifts}s_{employees}emp.csv")

# --- GENEROWANIE CSV ---
with open(filename, "w", newline="") as csvfile:
    writer = csv.writer(csvfile)

    # --- 1. Nagłówki wymagań ---
    req_headers = []
    req_values = []

    for day in range(1, days + 1):
        for shift in range(1, shifts + 1):
            req_headers.append(f"req_{day}d_{shift}s")
            min_required = max(1, employees // 2)
            max_required = employees - 1
            req_values.append(random.randint(min_required, max_required))

    writer.writerow(req_headers)
    writer.writerow(req_values)

    # --- 2. Nagłówki preferencji ---
    pref_headers = [f"pref_{day}d_{shift}s" 
                    for day in range(1, days + 1) 
                    for shift in range(1, shifts + 1)]
    writer.writerow(pref_headers)

    # --- 3. Preferencje pracowników (każda linia = jeden pracownik) ---
    for emp in range(1, employees + 1):
        row = [random.randint(0, 1) for _ in pref_headers]
        writer.writerow(row)

print("CSV wygenerowano:", filename)


CSV wygenerowano: data_csv\grafik_30d_3s_24emp.csv


In [None]:

import random
import csv
import os

#  data is represented as example
# 1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,0,1,1,0,1
# for 3 days and 3 employe
# first part (1,1,1) stands for 1 day preferences of each employee
# (3,1,3) stands for company requirments for each day
# (1,0,1) last part for 3 day assigned is employe  1 and 3

# Parametry
days = 3
employees = 3
sample = 7
max_tries = 1000
name = f"grafik_{sample}_{days}x{employees}.csv"
folder = "data_csv"
FileName = os.path.join(folder, name)

os.makedirs(folder, exist_ok=True)

def generate_preferences_for_employees(num_emps=employees, num_days=days):
    return [[random.randint(0, 1) for _ in range(num_days)] for _ in range(num_emps)]

def generate_schedule_from_preferences(preferences, firm_requirements, num_days=days):
    num_emps = len(preferences)
    schedule = [[0 for _ in range(num_emps)] for _ in range(num_days)]

    for e in range(num_emps):
        available_days = [d for d in range(num_days) if preferences[e][d] == 1]
        if len(available_days) < firm_requirements[e]:
            return None
        chosen = random.sample(available_days, firm_requirements[e])
        for d in chosen:
            schedule[d][e] = 1

    return schedule

def is_schedule_valid(schedule, firm_requirements):
    if schedule is None:
        return False
    num_days = len(schedule)
    num_emps = len(schedule[0])
    col_sums = [sum(schedule[d][e] for d in range(num_days)) for e in range(num_emps)]
    return col_sums == firm_requirements

def generate_dataset_csv(filename=FileName, num_samples=sample, num_emps=employees, num_days=days):
    with open(filename, "w", newline="") as csvfile:
        writer = csv.writer(csvfile)

        header = ["sample"]
        for d in range(num_days):
            for e in range(num_emps):
                header.append(f"emp{e}_pref{d}")
        header.extend([f"req_worker{e}" for e in range(num_emps)])
        for d in range(num_days):
            for e in range(num_emps):
                header.append(f"set_day{d}_emp{e}")
        header.append("mismatch_count")
        writer.writerow(header)

        samples_generated = 0
        attempts = 0

        while samples_generated < num_samples and attempts < max_tries * num_samples:
            preferences = generate_preferences_for_employees(num_emps, num_days)
            firm_requirements = [random.randint(1, num_days) for _ in range(num_emps)]

            if any(sum(preferences[e]) < firm_requirements[e] for e in range(num_emps)):
                attempts += 1
                continue

            schedule = generate_schedule_from_preferences(preferences, firm_requirements, num_days)
            if schedule is not None and is_schedule_valid(schedule, firm_requirements):
                row = [samples_generated]

                # Flatten preferences
                flat_preferences = [preferences[e][d] for d in range(num_days) for e in range(num_emps)]
                row.extend(flat_preferences)

                row.extend(firm_requirements)

                # Flatten schedule and calculate mismatch
                flat_schedule = [schedule[d][e] for d in range(num_days) for e in range(num_emps)]
                mismatch_count = sum(
                    1 for pref, sched in zip(flat_preferences, flat_schedule) if pref != sched
                )

                row.extend(flat_schedule)
                row.append(mismatch_count)

                writer.writerow(row)
                samples_generated += 1
            attempts += 1

        print(f"Generated {samples_generated} samples after {attempts} attempts.")
        print(f"File saved: {filename}")

generate_dataset_csv()

def sum_mismatch_in_file(filename):
    total_mismatch = 0
    with open(filename, "r", newline="") as csvfile:
        reader = csv.reader(csvfile)
        header = next(reader)

        mismatch_index = len(header) - 1  # ostatnia kolumna to mismatch_count

        for row in reader:
            total_mismatch += int(row[mismatch_index])

    sum_val=sample/total_mismatch
    print(f"Sum of mismatch_count {total_mismatch}")
    print(f"Mean value sample/total_mismatch {sum_val}")
    return None

sum_mismatch_in_file(FileName)

# Neural Network 

In [3]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input, Concatenate
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow.keras.backend as K
import tensorflow as tf
from sklearn.model_selection import KFold
import os


filetoread="grafik_100_3x3.csv"
crossvalidationK=10


data = pd.read_csv(filetoread)
nameof_file_prediction=f"predykcja_{filetoread}"
folder="data_csv"
resultofprediction =os.path.join(folder,nameof_file_prediction)

X_cols = [
    "emp0_pref0day", "emp0_pref1day", "emp0_pref2day",
    "emp1_pref0day", "emp1_pref1day", "emp1_pref2day",
    "emp2_pref0day", "emp2_pref0day", "emp2_pref0day",
    "req_worker0", "req_worker1", "req_worker2"
]

Y_cols = [
    "set_day0_emp0", "set_day0_emp1", "set_day0_emp2",
    "set_day1_emp0", "set_day1_emp1", "set_day1_emp2",
    "set_day2_emp0", "set_day2_emp1", "set_day2_emp2"
]

X = data[X_cols].values.astype(np.float32)
Y = data[Y_cols].values.astype(np.float32)

X_pref = X[:, :9]
X_req = X[:, 9:]

# Definicja niestandardowego modelu
class CustomModel(Model):
    def train_step(self, data):
        (x, req_tensor), y_true = data
        with tf.GradientTape() as tape:
            y_pred = self([x, req_tensor], training=True)

            y_pred_reshaped = tf.reshape(y_pred, (-1, 3, 3))
            assigned = tf.reduce_sum(y_pred_reshaped, axis=2)
            penalty = tf.reduce_mean(tf.abs(assigned - req_tensor), axis=1)

            bce = tf.reduce_mean(tf.keras.losses.binary_crossentropy(y_true, y_pred), axis=-1)
            total_loss = tf.reduce_mean(bce + 0.5 * penalty)

        gradients = tape.gradient(total_loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        self.compiled_metrics.update_state(y_true, y_pred)
        return {m.name: m.result() for m in self.metrics}

# Cross-walidacja K-Fold
kf = KFold(n_splits=crossvalidationK, shuffle=True, random_state=42)
val_accuracies = []

for fold, (train_index, test_index) in enumerate(kf.split(X)):
    print(f"\n--- Fold {fold + 1}/{crossvalidationK} ---")

    X_pref_train, X_pref_val = X_pref[train_index], X_pref[test_index]
    X_req_train, X_req_val = X_req[train_index], X_req[test_index]
    Y_train, Y_val = Y[train_index], Y[test_index]

    input_main = Input(shape=(9,), name='preferences')
    input_req = Input(shape=(3,), name='requirements')

    x = Concatenate()([input_main, input_req])
    x = Dense(32, activation='relu')(x)
    x = Dense(64, activation='relu')(x)
    output = Dense(9, activation='sigmoid')(x)

    model = CustomModel(inputs=[input_main, input_req], outputs=output)
    model.compile(optimizer=Adam(), loss=tf.keras.losses.BinaryCrossentropy(), metrics=['accuracy'])

    early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, verbose=0)

    model.fit(
        [X_pref_train, X_req_train],
        Y_train,
        validation_data=([X_pref_val, X_req_val], Y_val),
        epochs=50,
        batch_size=32,
        callbacks=[early_stop],
        verbose=0
    )

    val_loss, val_acc = model.evaluate([X_pref_val, X_req_val], Y_val, verbose=0)
    val_accuracies.append(val_acc)
    print(f"Fold {fold + 1} - Validation accuracy: {val_acc:.4f}")

print(f"\nŚrednia dokładność walidacji (accuracy): {np.mean(val_accuracies):.4f}")

# Przykładowa prognoza
test_pref = np.array([[0, 1, 1, 1, 1, 1, 0, 0, 1]]).astype(np.float32)  # Konwersja na float32
test_req = np.array([[2, 3, 1]]).astype(np.float32)  # Konwersja na float32

# Upewnij się, że przekazujesz dane wejściowe w postaci słownika
prediction = model.predict({'preferences': test_pref, 'requirements': test_req})

y_pred_binary = (prediction > 0.4).astype(int)

print("Binarna prognoza (0 = nie pracuje, 1 = pracuje):")
print(y_pred_binary[0].reshape(3, 3))

print(test_req)

# Prognoza ciągła
print("Predykcja ciągła (wartości zmiennoprzecinkowe):")
print(prediction[0].reshape(3, 3))

# Predykcja na wszystkich 100 próbkach
all_predictions = model.predict({'preferences': X_pref, 'requirements': X_req})
all_predictions_binary = (all_predictions > 0.4).astype(int)

columns = [
    "set_day0_emp0", "set_day0_emp1", "set_day0_emp2",
    "set_day1_emp0", "set_day1_emp1", "set_day1_emp2",
    "set_day2_emp0", "set_day2_emp1", "set_day2_emp2"
]

df_cont = pd.DataFrame(all_predictions, columns=columns)
df_cont.to_csv(resultofprediction, index_label="sample")


--- Fold 1/10 ---


```
for metric in self.metrics:
    metric.update_state(y, y_pred)
```

  return self._compiled_metrics_update_state(


Fold 1 - Validation accuracy: 0.3000

--- Fold 2/10 ---
Fold 2 - Validation accuracy: 0.2000

--- Fold 3/10 ---
Fold 3 - Validation accuracy: 0.1000

--- Fold 4/10 ---
Fold 4 - Validation accuracy: 0.2000

--- Fold 5/10 ---
Fold 5 - Validation accuracy: 0.0000

--- Fold 6/10 ---
Fold 6 - Validation accuracy: 0.2000

--- Fold 7/10 ---
Fold 7 - Validation accuracy: 0.3000

--- Fold 8/10 ---
Fold 8 - Validation accuracy: 0.0000

--- Fold 9/10 ---
Fold 9 - Validation accuracy: 0.4000

--- Fold 10/10 ---
Fold 10 - Validation accuracy: 0.3000

Średnia dokładność walidacji (accuracy): 0.2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
Binarna prognoza (0 = nie pracuje, 1 = pracuje):
[[0 1 0]
 [1 1 1]
 [1 0 0]]
[[2. 3. 1.]]
Predykcja ciągła (wartości zmiennoprzecinkowe):
[[0.11137709 0.92319953 0.39152524]
 [0.9054446  0.97797245 0.6122471 ]
 [0.47718436 0.19971299 0.34023464]]
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step


In [None]:
asd

# not working yet larger nn

In [19]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score

# Wczytanie danych z CSV
data = pd.read_csv("grafik_2000_3x3.csv")

# Kolumny wejściowe (preferencje i wymagania)
X_cols = [
    "emp0_pref0", "emp0_pref1", "emp0_pref2",
    "emp1_pref0", "emp1_pref1", "emp1_pref2",
    "emp2_pref0", "emp2_pref1", "emp2_pref2",
    "req_worker0", "req_worker1", "req_worker2"
]

Y_cols = [
    "day0_emp0", "day0_emp1", "day0_emp2",
    "day1_emp0", "day1_emp1", "day1_emp2",
    "day2_emp0", "day2_emp1", "day2_emp2"
]

# Przygotowanie danych
X = data[X_cols].values
Y = data[Y_cols].values

model = Sequential()
model.add(Dense(128, activation='relu', input_shape=(20,)))
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(16, activation='sigmoid'))

# Early stopping
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])

# Trening modelu
model.fit(X, Y, epochs=50, batch_size=32, validation_split=0.2, callbacks=[early_stop])

test_input = np.array([[0,1,1,1,1,1,0,0,1,2,3,1]])  # 12 wartości
prediction = model.predict(test_input)
y_pred_binary = (prediction > 0.5).astype(int)

print("Binarna prognoza (0 = nie pracuje, 1 = pracuje):")
print(y_pred_binary[0].reshape(3, 3))

print("Przewidywany harmonogram (3 dni × 3 pracowników):")
print(test_input)
print(prediction[0].reshape(3, 3))
# wiersz oznacza dzien kolumna przewidywania danego pracownika


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 17ms/step - accuracy: 0.1778 - loss: 0.6884 - val_accuracy: 0.2700 - val_loss: 0.6302
Epoch 2/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.2907 - loss: 0.6121 - val_accuracy: 0.2100 - val_loss: 0.5414
Epoch 3/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.2392 - loss: 0.5240 - val_accuracy: 0.2550 - val_loss: 0.4492
Epoch 4/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.2068 - loss: 0.4401 - val_accuracy: 0.2525 - val_loss: 0.3879
Epoch 5/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.2079 - loss: 0.3873 - val_accuracy: 0.2075 - val_loss: 0.3536
Epoch 6/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.2037 - loss: 0.3502 - val_accuracy: 0.1725 - val_loss: 0.3259
Epoch 7/50
[1m50/50[0m [32m━━━━━━━━━