# txt (print 在 console 會是正的的)


In [1]:
import os
if not os.path.exists('./output/txt_out'):
    os.makedirs('./output/txt_out')

In [2]:
import os
import pandas as pd


def len_with_chinese(s):
    l = 0
    for c in s:
        l += 2 if ord(c) > 127 else 1
    return l


def series_max_len(series, withColumn=True):
    max_value = None
    max_len = 0
    for value in series.values:
        if max_value is None or len_with_chinese(value) > max_len:
            max_value = value
            max_len = len_with_chinese(value)
    if withColumn:
        if len_with_chinese(series.name) > max_len:
            max_value = series.name

    return max_value


def make_txt_tabl(df):
    each_column_max_len = []
    for column in df.columns:
        max_len_item = series_max_len(df[column])
        each_column_max_len.append(len_with_chinese(max_len_item))

    # border
    border = ""
    for v in each_column_max_len:
        border += "+"
        border += "-"*(v+2)
    border += "+\n"

    # column
    data = border
    for index, column in enumerate(df.columns):
        column_len = len_with_chinese(column)
        space_left = (each_column_max_len[index]+2-column_len)//2
        space_right = each_column_max_len[index]+2-column_len-space_left
        data += ("|"+space_left*" "+column+space_right*" ")
    data += "|\n"

    # data
    h, w = df.shape
    for y in range(h):
        data += border
        for x in range(w):
            element = df.iloc[y, x]
            element_len = len_with_chinese(element)
            space_left = (each_column_max_len[x]+2-element_len)//2
            space_right = each_column_max_len[x]+2-element_len-space_left
            data += ("|"+space_left*" "+element+space_right*" ")
        data += "|\n"
    data += border

    return data


# main
files = ["106_simplified.csv", "107_simplified.csv",
         "108_simplified.csv", "109_simplified.csv", "110_simplified.csv"]

for file in files:
    path = os.path.join("data", file)
    df = pd.read_csv(path)
    df["延修率"] = df["延修率"].round(4).astype(str)
    df["休學率"] = df["休學率"].round(4).astype(str)
    df["退學率"] = df["退學率"].round(4).astype(str)
    df["排名"] = df["排名"].astype(str)

    print(file.center(40, "="))
    table = make_txt_tabl(df)
    print(table)

    # write to txt
    with open(f"./output/txt_out/{file}.txt", "w+", encoding="utf8") as f:
        f.write(table)

+------------------+---------+--------+--------+--------+
|     學校名稱     |  排名   | 延修率 | 休學率 | 退學率 |
+------------------+---------+--------+--------+--------+
|     中原大學     | 601-800 | 0.0408 | 0.0812 | 0.0483 |
+------------------+---------+--------+--------+--------+
|   中國醫藥大學   | 501-600 | 0.0149 | 0.0674 | 0.0441 |
+------------------+---------+--------+--------+--------+
|     亞洲大學     | 601-800 | 0.0326 | 0.0546 | 0.0908 |
+------------------+---------+--------+--------+--------+
|     元智大學     | 601-800 | 0.0358 | 0.044  | 0.0502 |
+------------------+---------+--------+--------+--------+
|   國立中央大學   | 601-800 | 0.0194 | 0.0787 | 0.0454 |
+------------------+---------+--------+--------+--------+
|   國立中山大學   | 601-800 | 0.0261 | 0.1018 | 0.0563 |
+------------------+---------+--------+--------+--------+
|   國立中正大學   | 601-800 | 0.0229 | 0.0929 | 0.049  |
+------------------+---------+--------+--------+--------+
|   國立中興大學   | 601-800 | 0.0218 | 0.0936 | 0.0548 |
+------------

# html


In [3]:
import os
if not os.path.exists('./output/html_out'):
    os.makedirs('./output/html_out')

In [4]:
import pandas as pd
import os


def make_html(df):
    # add table
    dom = ""
    dom += "<table border=1>"

    # add column title
    dom += "<tr>"
    for column in df.columns:
        dom += f"<th>{column}</th>"
    dom += "</tr>"

    # add data

    h, w = df.shape

    for y in range(h):
        dom += "<tr>"
        for x in range(w):
            dom += f"<td>{df.iloc[y,x]}</td>"
        dom += "<tr>"

    # end table
    dom += "</table>"

    return dom


# main
files = ["106_simplified.csv", "107_simplified.csv",
         "108_simplified.csv", "109_simplified.csv", "110_simplified.csv"]

for file in files:
    path = os.path.join("data", file)
    df = pd.read_csv(path)
    df["延修率"] = df["延修率"].round(4).astype(str)
    df["休學率"] = df["休學率"].round(4).astype(str)
    df["退學率"] = df["退學率"].round(4).astype(str)
    df["排名"] = df["排名"].astype(str)

    dom = make_html(df)

    # write to html
    with open(f"./output/html_out/{file}.html", "w+", encoding="utf8") as f:
        f.write(dom)

# matplotlib


# 有排名和沒排名學校延修率,休學率,退學率比較


In [5]:
import pandas as pd
import matplotlib.pyplot as plt

df = pd.DataFrame(columns=["年分", "有排名學校延修率", "有排名學校休學率",
                  "有排名學校退學率", "無排名學校延修率", "無排名學校休學率", "無排名學校退學率"])

for year in range(106, 111):
    # school that year in ranking
    ranking_school = pd.read_csv(f"./data/{year}_taiwan_ranking.csv")
    target_school = ranking_school["name"].tolist()

    #  taiwan schools that year
    taiwan_school = pd.read_csv(f"./data/{year}.csv")

    d = {column: 0 for column in df.columns}
    d["年分"] = year
    ranking_count = 0
    no_ranking_count = 0

    h, w = taiwan_school.shape

    for y in range(h):
        total_student = taiwan_school.iloc[y, 1]
        if total_student <= 0:
            continue
        school_name = taiwan_school.iloc[y, 0]
        delay_total_student = taiwan_school.iloc[y, 4]
        suspension_total_student = taiwan_school.iloc[y, 13]
        quit_total_student = taiwan_school.iloc[y, 29]
        if school_name in target_school:
            ranking_count += 1
            d["有排名學校延修率"] += (delay_total_student/total_student)
            d["有排名學校休學率"] += (suspension_total_student/total_student)
            d["有排名學校退學率"] += (quit_total_student/total_student)
        else:
            no_ranking_count += 1
            d["無排名學校延修率"] += (delay_total_student/total_student)
            d["無排名學校休學率"] += (suspension_total_student/total_student)
            d["無排名學校退學率"] += (quit_total_student/total_student)

    d["有排名學校延修率"] /= ranking_count
    d["有排名學校休學率"] /= ranking_count
    d["有排名學校退學率"] /= ranking_count
    d["無排名學校延修率"] /= no_ranking_count
    d["無排名學校休學率"] /= no_ranking_count
    d["無排名學校退學率"] /= no_ranking_count

    # plt.figure()
    df.loc[len(df)] = d
print(df)
    
    


FileNotFoundError: [Errno 2] No such file or directory: './data/106_taiwan_ranking.csv'