In [None]:
import pandas as pd

#現在の最大表示列数の出力
pd.get_option("display.max_columns")

#最大表示列数の指定（ここでは50列を指定）
pd.set_option('display.max_columns', 50)

In [None]:
file_path = 'input.csv'
master_df = pd.read_csv(file_path)
df = master_df.copy()
df

In [None]:
def classify_record(row):
    issue_type = row["Issue Type"]
    comments = [row[f"Comment{i}"] for i in range(1, 25) if not pd.isna(row[f"Comment{i}"])]  # 欠損値でないComment列のみ

    # Issue Typeが "AAAA"、"BBBB"、またはそれらを含む文字列で、かつComment列に "CCCC" または "DDDD" が含まれていない場合、"routine"と判断
    if any(issue_type.startswith(prefix) for prefix in ["AAAA", "BBBB"]) and not any("CCCC" in comment or "DDDD" in comment for comment in comments):
        return "routine"
    else:
        return "non-routine"

In [None]:
df["Type"] = df.apply(classify_record, axis=1)

In [None]:
df

In [None]:
import matplotlib.pyplot as plt

routine_issue_counts = df[df["Type"] == "routine"]["Issue Type"].value_counts()
non_routine_issue_counts = df[df["Type"] == "non-routine"]["Issue Type"].value_counts()

plt.figure(figsize=(10, 6))

# "routine"のIssue Type内訳を横棒グラフで表示
plt.subplot(1, 2, 1)
routine_issue_counts.plot(kind='barh')
plt.title('Routine - Issue Type Count')
plt.xlabel('Count')
plt.ylabel('Issue Type')

# データポイントを表示
for i, count in enumerate(routine_issue_counts):
    plt.text(count, i, str(count), va='center', fontsize=12, color='black')

# "non-routine"のIssue Type内訳を横棒グラフで表示
plt.subplot(1, 2, 2)
non_routine_issue_counts.plot(kind='barh')
plt.title('Non-routine - Issue Type Count')
plt.xlabel('Count')
plt.ylabel('Issue Type')

# データポイントを表示
for i, count in enumerate(non_routine_issue_counts):
    plt.text(count, i, str(count), va='center', fontsize=12, color='black')

plt.tight_layout()
plt.show()


In [None]:
df[(df['Type']=='non-routine') & (df['Issue Type']=='WhiteListing IN')]

In [None]:
# "routine" と "non-routine" ごとに "Actual Hours" のヒストグラムを作成
plt.figure(figsize=(10, 6))

n, bins, patches = plt.hist([df[df["Type"] == "routine"]["Actual Hours"].values, df[df["Type"] == "non-routine"]["Actual Hours"].values],
         bins=30, alpha=0.7, label=["routine", "non-routine"])

plt.title("Actual Hours: routine vs. non-routine(bins=30)")
plt.xlabel("Actual Hours")
plt.ylabel("Count")
plt.legend()

# 各バーの上にデータポイントを表示
for i in range(len(patches[0])):
    x_routine = patches[0][i].get_x() + 0.5 * patches[0][i].get_width()
    y_routine = n[0][i]
    x_non_routine = patches[1][i].get_x() + 0.5 * patches[1][i].get_width()
    y_non_routine = n[1][i]

    plt.annotate(f"{int(y_routine)}", (x_routine, y_routine), ha="center", va="bottom")
    plt.annotate(f"{int(y_non_routine)}", (x_non_routine, y_non_routine), ha="center", va="bottom")


plt.tight_layout()
plt.show()

In [None]:
df.to_csv('output.csv', index=False)