In [23]:
import pandas as pd
import glob
import os

# Directory containing the CSV files
csv_dir = 'C:/Users/User/Downloads/'  # adjust as needed

# List of suffixes in the filenames
suffixes = [
    "강번일람_헤더", "강번일람_분석", "강번일람_기별집계",
    "강번일람_종합", "강번일람_휴식시간", "강번일람_주원료",
    "강번일람_EAF부자재", "강번일람_CC부자재"
]

# Base filename prefix
base_prefix = "큐슈제강_조업데이터시트_2024년_1월.xlsx - "

# Load and align all CSVs
dfs = []
for suffix in suffixes:
    file_path = os.path.join(csv_dir, f"{base_prefix}{suffix}.csv")
    df = pd.read_csv(file_path)
    df.reset_index(drop=True, inplace=True)  # Align rows by index
    dfs.append(df)

# Concatenate horizontally (column-wise)
combined_df = pd.concat(dfs, axis=1)

# Save the result
output_path = os.path.join(csv_dir, "큐슈제강_조업데이터시트_2024년_1월_합본.csv")
combined_df.to_csv(output_path, index=False, encoding='utf-8-sig')

print(f"Saved combined CSV to: {output_path}")

print(f"Number of columns: {combined_df.shape[1]}")

Saved combined CSV to: C:/Users/User/Downloads/큐슈제강_조업데이터시트_2024년_1월_합본.csv
Number of columns: 286


In [24]:
# Target months: 1 to 12, skipping May (5)
months = [m for m in range(1, 13) if m != 5]

# Filename pattern
base_prefix = "큐슈제강_조업데이터시트_2024년_{}월_합본.csv"

# Load all matching files
monthly_dfs = []
for month in months:
    filename = base_prefix.format(month)
    file_path = os.path.join(csv_dir, filename)
    
    if os.path.exists(file_path):
        df = pd.read_csv(file_path)
        monthly_dfs.append(df)
    else:
        print(f"⚠️  File not found, skipping: {filename}")

# Concatenate all monthly DataFrames vertically (by rows)
final_df = pd.concat(monthly_dfs, axis=0, ignore_index=True)

# Save the merged CSV
output_path = os.path.join(csv_dir, "큐슈제강_조업데이터시트_2024년_전체합본.csv")
final_df.to_csv(output_path, index=False, encoding='utf-8-sig')

print(f"✅ Merged CSV saved to: {output_path}")


✅ Merged CSV saved to: C:/Users/User/Downloads/큐슈제강_조업데이터시트_2024년_전체합본.csv


In [30]:
filename = "큐슈제강_조업데이터시트_2024년_전체합본.csv"
file_path = os.path.join(csv_dir, filename)

df = pd.read_csv(file_path, dtype={2: str})

# Check if the column exists
if "로개회수" not in df.columns:
    raise ValueError("Column '로개회수' not found in the DataFrame.")

# Ensure it's numeric (in case it was loaded as string)
df["로개회수"] = pd.to_numeric(df["로개회수"], errors='coerce')

# Find discontinuities — where difference is not 1
df["diff"] = df["로개회수"].diff()

# Get rows where the difference is not 1 (excluding first row)
discontinuities = df[(df["diff"].notnull()) & (df["diff"] != 1)]

# Show discontinuities with context (previous and next values)
for idx in discontinuities.index:
    print(f"\nDiscontinuity at index {idx}:")
    print(df.loc[idx - 1:idx + 1, ["로개회수", "diff"]])

# Optional: save to file for review
discontinuities.to_csv("로개회수_불연속지점.csv", index=False)


Discontinuity at index 977:
     로개회수   diff
976  5826    1.0
977  6089  263.0
978  6090    1.0
