In [1]:
from google.colab import files
files.upload() # kaggle.jsonをアップロード
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 /root/.kaggle/kaggle.json

Saving kaggle.json to kaggle.json


In [4]:
!pip install category_encoders
import pandas as pd
# category_encodersをインポート
import category_encoders as cate_enc
# ニューラルネットワークのクラスをインポート
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

# データの準備
def prepare():
  !kaggle datasets download -d \
  cms/hospital-general-information
  !unzip hospital-general-information.zip

def preprocess():
    df = pd.read_csv('HospInfo.csv')
    print(df)
    print(df)
    # 病院のデータ
    features = ['City', 'State',
                  'County Name', 'Hospital Type',
                    'Emergency Services', 
        'Meets criteria for meaningful use of EHRs',
                'Mortality national comparison', 
        'Safety of care national comparison',
                'Readmission national comparison', 
        'Patient experience national comparison',
                'Effectiveness of care national comparison', 
        'Timeliness of care national comparison',
                'Efficient use of medical imaging national comparison']
    ignores = []
    for f in df.columns.values:
            if not f in features: 
                ignores.append(f)
    ignores.remove('Hospital overall rating')
    ratings = ['1', '2', '3', '4', '5']
    mp = {'1':0, '2':1, '3':2, '4':3, '5':4}
    df = df[df['Hospital overall rating'].isin(ratings)]
    df['Hospital overall rating'].replace(mp, inplace=True)
    df.drop(ignores, axis=1, inplace=True)
    # One-hotエンコーディング
    ohe = cate_enc.OneHotEncoder(cols=features,
                     handle_unknown='impute')
    ndf = ohe.fit_transform(df)
    # 病院の評価を予測対象とする
    y = ndf.loc[:,['Hospital overall rating']].values.ravel()
    ndf.drop(columns=['Hospital overall rating'],
               inplace=True)
    return ndf, y, ratings

def main():
    prepare()
    ndf, y, ratings = preprocess()
    # One-hotエンコーディング結果の確認
    print(ndf.loc[:,ndf.columns.values[:5]].head())
    print(ndf.loc[:,ndf.columns.values[3565:3570]].head())
    # テストデータと学習データに分割して
    # ニューラルネットワークによるratingの学習と予測
    X = ndf.loc[:,ndf.columns.values].values
    X_tr, X_te, y_tr, y_te = train_test_split(
    X, y, random_state=0, train_size=0.7)
    clf = MLPClassifier(solver='adam', alpha=1e-5, 
                      hidden_layer_sizes=(100,), 
        activation='tanh',
                      random_state=1, max_iter=3000)
    clf.fit(X_tr, y_tr)
    y_pre = clf.predict(X_te)
    print(classification_report(y_te, 
    y_pre, target_names=ratings, zero_division=1))

if __name__ == '__main__':
    main()


Downloading hospital-general-information.zip to /content
  0% 0.00/355k [00:00<?, ?B/s]
100% 355k/355k [00:00<00:00, 51.5MB/s]
Archive:  hospital-general-information.zip
  inflating: HospInfo.csv            
      Provider ID  ...                                           Location
0           10005  ...             2505 U S HIGHWAY 431 NORTH\nBOAZ, AL\n
1           10012  ...             200 MED CENTER DRIVE\nFORT PAYNE, AL\n
2           10032  ...               209 NORTH MAIN STREET\nWEDOWEE, AL\n
3           10095  ...                 508 GREEN STREET\nGREENSBORO, AL\n
4           10131  ...               ONE HOSPITAL DR SE\nHUNTSVILLE, AL\n
...           ...  ...                                                ...
4807       450617  ...  500 MEDICAL CENTER BLVD\nWEBSTER, TX\n(29.5410...
4808       520194  ...  475 W RIVER WOODS PKWY\nGLENDALE, WI\n(43.0982...
4809       491302  ...  159 HARTLEY WAY\nPEARISBURG, VA\n(37.332697, -...
4810       510012  ...  2520 VALLEY DRIVE\nPOINT PLE

  elif pd.api.types.is_categorical(cols):


   City_1  City_2  City_3  City_4  City_5
0       1       0       0       0       0
1       0       1       0       0       0
2       0       0       1       0       0
4       0       0       0       1       0
6       0       0       0       0       1
   County Name_1239  ...  Emergency Services_2
0                 0  ...                     0
1                 0  ...                     0
2                 0  ...                     0
4                 0  ...                     0
6                 0  ...                     0

[5 rows x 5 columns]
              precision    recall  f1-score   support

           1       0.38      0.54      0.44        41
           2       0.52      0.47      0.49       193
           3       0.68      0.66      0.67       537
           4       0.57      0.60      0.58       275
           5       0.26      0.36      0.31        25

    accuracy                           0.60      1071
   macro avg       0.48      0.52      0.50      1071
weighted a