#Setup


General Imports


In [0]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math

np.random.seed(49)

Google Drive Mounting

In [2]:
from google.colab import drive 
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


Establish the working directory

In [0]:
WORK_DIR = '/content/drive/My Drive/Colab Notebooks/CSCI 4750: Final Project'
# WORK_DIR='/content/drive/My Drive/Colab Notebooks/csci5750/projects/will'
%pwd
os.chdir(WORK_DIR)
## Establish a directory to save figures in ##
os.makedirs("figures",exist_ok=True)
FIGURES_PATH = os.path.join(WORK_DIR,"figures")

## Establish a directory to save tables in ##
os.makedirs("tables",exist_ok=True)
TABLES_PATH = os.path.join(WORK_DIR,"tables")

## Establish a directory to save the data in ##
os.makedirs("datasets",exist_ok=True)
DATA_PATH = os.path.join(WORK_DIR,"datasets")

In [4]:

## Establish the dataset URL ##
DATASET_URL = "http://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/"

# Ignore warnings
import warnings
warnings.filterwarnings(action="ignore", message="^internal gelsd")

#Check current directory
%pwd 

'/content/drive/My Drive/Colab Notebooks/CSCI 4750: Final Project'

SKLearn Imports

In [0]:
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer


from sklearn.preprocessing import Binarizer, binarize, MinMaxScaler, Normalizer, StandardScaler

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

from sklearn.model_selection import train_test_split, GridSearchCV

from sklearn.metrics import make_scorer,accuracy_score,precision_score,recall_score,classification_report, balanced_accuracy_score

from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier

Data Retrevial Function

Only 14 attributes used:

 3 age: age in years

 4 sex: sex (1 = male; 0 = female)

 9 cp: chest pain type

   -- Value 1: typical angina
   -- Value 2: atypical angina
   -- Value 3: non-anginal pain
   -- Value 4: asymptomatic

 10 trestbps: resting blood pressure (in mm Hg on admission to the hospital)

 12 chol: serum cholestoral in mg/dl

 16 fbs: (fasting blood sugar > 120 mg/dl) (1 = true; 0 = false)

 19  restecg: resting electrocardiographic results

   -- Value 0: normal
   -- Value 1: having ST-T wave abnormality (T wave inversions and/or ST elevation or depression of > 0.05 mV)
   -- Value 2: showing probable or definite left ventricular hypertrophy by Estes' criteria
 32 thalach: maximum heart rate achieved

 38 exang: exercise induced angina (1 = yes; 0 = no)

 40 oldpeak = ST depression induced by exercise relative to rest

 41 slope: the slope of the peak exercise ST segment

   -- Value 1: upsloping
   -- Value 2: flat
   -- Value 3: downsloping
 44 ca: number of major vessels (0-3) colored by flourosopy

 51 thal: 3 = normal; 6 = fixed defect; 7 = reversable defect

 58 num: diagnosis of heart disease (angiographic disease status)

   -- Value 0: < 50% diameter narrowing
   -- Value 1: > 50% diameter narrowing

In [0]:
frame_labels = ["age","sex","cp","trestbps","chol","fbs","restecg","thalach","exang","oldpeak","slope","ca","thal","num"]

def fetch_data(fileName,data_url=DATASET_URL,save_data_path = DATA_PATH):
  tempDataPath = os.path.join(save_data_path,fileName)
  tempFrame = pd.read_table(filepath_or_buffer=data_url,delimiter=",",header=None,na_values='?')
  tempFrame.columns = frame_labels
  tempFrame.to_csv(path_or_buf=tempDataPath)
  return tempFrame


Plotting Functions and Setup

In [0]:
%matplotlib inline

def saveFigure(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(FIGURES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution);

Dataframe to CSV Saver

In [0]:
def save_DataFrame_as_csv(df,frameName):
  path = os.path.join(TABLES_PATH,frameName+".csv")
  df.to_csv(path_or_buf=path)

#Data Formatting

Import Data

In [0]:
raw_cleveland = fetch_data(fileName = "cleveland.csv",data_url="http://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data")
raw_hungarian = fetch_data(fileName = "hungarian.csv",data_url="http://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.hungarian.data")
raw_switzerland = fetch_data(fileName = "switzerland.csv",data_url="http://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.switzerland.data")
raw_va = fetch_data(fileName = "va.csv",data_url="http://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.va.data")

raw_data_sets = [raw_cleveland,raw_hungarian,raw_switzerland,raw_va]

set_names = ["Cleveland_Data","Hungarian_Data","Switzerland_Data","VA_Data","Combined_Data"]

Imputure Function for missing values

In [0]:
def data_imputure_Iterative(localDF):
  data_imputure_Iterative = IterativeImputer(imputation_order="ascending",max_iter=100)
  localSet = data_imputure_Iterative.fit_transform(localDF)
  localSet = pd.DataFrame(localSet)
  localSet.columns = frame_labels
  return (localSet)

Check datasets for NaN data

In [11]:
def check_NaN_in_data(dataset,name="passedSet"):
  # hasNaN = dataset.isnull().any()
  # hasNaN.column = "contains NaN"
  rawNaN = pd.DataFrame()
  rawNaN = pd.DataFrame(dataset.isnull().sum(),columns=["# of NaN"] )
  rawNaN["Percent NaN"]=pd.DataFrame(dataset.isnull().sum()/len(dataset)*100)
  print(name+"'s NaN values: ");
  print(rawNaN)
  save_DataFrame_as_csv(rawNaN,(name+"'s NaN values"))

# check_NaN_in_data(raw_data_sets[0],set_names[0])
for x in range(4):
  check_NaN_in_data(raw_data_sets[x],set_names[x])


  ## Add in NaN count

Cleveland_Data's NaN values: 
          # of NaN  Percent NaN
age              0     0.000000
sex              0     0.000000
cp               0     0.000000
trestbps         0     0.000000
chol             0     0.000000
fbs              0     0.000000
restecg          0     0.000000
thalach          0     0.000000
exang            0     0.000000
oldpeak          0     0.000000
slope            0     0.000000
ca               4     1.320132
thal             2     0.660066
num              0     0.000000
Hungarian_Data's NaN values: 
          # of NaN  Percent NaN
age              0     0.000000
sex              0     0.000000
cp               0     0.000000
trestbps         1     0.340136
chol            23     7.823129
fbs              8     2.721088
restecg          1     0.340136
thalach          1     0.340136
exang            1     0.340136
oldpeak          0     0.000000
slope          190    64.625850
ca             291    98.979592
thal           266    90.476190
num         

Histogram Creation Function

In [0]:
def read_save_Histogram(dataSet,setName):
  # print(dataSet.describe())
  dataSet.hist(bins=30, figsize=(40,30),layout=[2,7]);
  figName = setName +"_atribute_histogram_plots"
  saveFigure(figName)

Initial Data Imputation, Scaling, and Visualization

In [0]:
## Impute NaN values ##
imputed_sets = []

for dataSet in raw_data_sets:
  tempFrame = data_imputure_Iterative(dataSet)
  imputed_sets.append(tempFrame)

combined_imputed_data = pd.concat(imputed_sets)
total_imputed_data = data_imputure_Iterative(pd.concat(raw_data_sets))


In [14]:
## Ensure that there are no longer an missing values.
check_NaN_in_data(combined_imputed_data,set_names[4]);

Combined_Data's NaN values: 
          # of NaN  Percent NaN
age              0          0.0
sex              0          0.0
cp               0          0.0
trestbps         0          0.0
chol             0          0.0
fbs              0          0.0
restecg          0          0.0
thalach          0          0.0
exang            0          0.0
oldpeak          0          0.0
slope            0          0.0
ca               0          0.0
thal             0          0.0
num              0          0.0


In [0]:
## Converting the "num" column into a binary class system ##
## 0 = no disease, 1 = disease ##
combined_imputed_data_nonbinary = combined_imputed_data
combined_imputed_data.num=binarize([combined_imputed_data.num],copy=True)[0]


In [0]:
## Saving Histograms ##

## Histograms for imputed sets.
for (dataSet,setName) in zip(imputed_sets,set_names):
  read_save_Histogram(dataSet,setName);
  
## Histogram for the combined set
read_save_Histogram(combined_imputed_data,"Combined_Imputed_Dataset");
read_save_Histogram(total_imputed_data,"Total_Imputed_Dataset");

Correlation and Scatter Matrix

In [0]:
def determine_correlation_with_target_label(dataSet):
  tempCorrMatrix = dataSet.corr();
  return tempCorrMatrix["num"].sort_values(ascending=False)

In [0]:
def find_save_corr(dataSet,setName):
  tempDF = determine_correlation_with_target_label(dataSet)
  save_DataFrame_as_csv(tempDF,setName+"_Correltation_with_'num'")


In [0]:
def find_save_corr_multi(dataSets,setNames,fileName):
  tempCorrList = []
  for data in dataSets:
    tempCorrList.append(determine_correlation_with_target_label(data))
  allCorrDF = pd.DataFrame(tempCorrList)
  allCorrDF.insert(0,"Set Names",setNames,False)
  save_DataFrame_as_csv(allCorrDF,fileName)


In [0]:
## Test of corr saver ##

find_save_corr(combined_imputed_data,"Combined_Imputed_Dataset")

In [0]:
## Test of corr_saver_multi ##

find_save_corr_multi(imputed_sets,set_names[:4],"Imputed_Data_Correltation_with_'num'")

#Setting Up Training, Validation, and Testing Sets

Removing Incomplete Features

In [0]:
# These are removed for high numbers of NaN in their unimputed form; 80%<
pre_processing_data = combined_imputed_data.drop(columns=["ca","thal"])

In [0]:
## Creating Raw Training and Testing Sets

raw_features = pre_processing_data.drop(columns=["num"])
raw_lables =pd.DataFrame(pre_processing_data.pop(('num')))
raw_X_train, raw_X_test, y_train, y_test = train_test_split(raw_features,raw_lables,test_size=0.2,shuffle=True,stratify=raw_lables)

In [0]:
categorical_column_labels =["sex","cp","fbs","restecg"]
continuous_column_labels = ["age","trestbps","chol","thalach","exang","oldpeak","slope"]

categorical_transformer = Pipeline(steps=[('scalar',MinMaxScaler())])
continuous_transformer = Pipeline(steps=[('scalar1',MinMaxScaler())])

final_preprocesser = ColumnTransformer(transformers=[('num',continuous_transformer,continuous_column_labels),('cat',categorical_transformer,categorical_column_labels)],remainder="passthrough")

In [0]:
# Normalization 1 Test#
# minMax = MinMaxScaler()
# minMax_data = pd.DataFrame(minMax.fit_transform(pre_processing_data))
# minMax_data.columns = pre_processing_data.columns
# print(minMax_data)
# print(minMax_data.info())
# read_save_Histogram(minMax_data,"Min_Maxed_AllData")

Splitting processed data into training and testing sets.


In [0]:
## Finalizing the training and testing data ##
X_train = final_preprocesser.fit_transform(raw_X_train)
X_test = final_preprocesser.transform(raw_X_test)

Grid Search Setup

In [0]:
scorers = {
    # 'precision_score': make_scorer(precision_score),
    # 'recall_score': make_scorer(recall_score),
    'balanced_accuracy_score': make_scorer(balanced_accuracy_score)
}

DT_parameters = {
    'criterion':['gini','entropy'],
    'max_depth':[None,5,10,15,20],
    'min_samples_leaf':list(range(1,100,5)),
    'max_features':(None,"auto","sqrt","log2"),
    'ccp_alpha':list(range(0,2))
}

KNN_parameters = {
    'weights':['uniform','distance'],
    'algorithm':['auto','ball_tree','kd_tree','brute','auto'],
    'n_neighbors':list(range(5,20,5)),
    'leaf_size':list(range(5,30,5)),
    'n_jobs':[-1],
}

# SVM_parameters = {
#     'C':list(range(1,100)),
#     'kernel':['linear', 'poly', 'rbf', 'sigmoid', 'precomputed'],
#     'degree':list(range(3,8)),
#     'gamma':['scale','auto'],
#     'shrinking':[True,False],
#     'class_weight':['balanced']

# }

SGD_parameters = {
    'loss':['hinge'],
    'penalty':['l2','l1','elasticnet'],
    'tol':[None,1e-3],
    'fit_intercept':[True,False],
    'shuffle':[True,False],
    'learning_rate':['optimal'],
    'early_stopping':[True],
    'class_weight':[None,'balanced'],
}

RF_parameters = {
    'n_estimators':list(range(100,1000,100)),
    'criterion':['gini','entropy'],
}


In [28]:
## Descision Tree ##
best_DT = GridSearchCV(DecisionTreeClassifier(),DT_parameters,cv=10,scoring='balanced_accuracy',return_train_score=False,n_jobs=-1)
best_DT.fit(X_train,y_train)


GridSearchCV(cv=10, error_score=nan,
             estimator=DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None,
                                              criterion='gini', max_depth=None,
                                              max_features=None,
                                              max_leaf_nodes=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              presort='deprecated',
                                              random_state=None,
                                              splitter='best'),
             iid='deprecated', n_jobs=-1,
             param_grid={'ccp_alpha': [0, 1], 'criterion': ['gini', 'entropy'],

In [29]:
## KNeighborsClassifier ##
best_KNN = GridSearchCV(KNeighborsClassifier(),KNN_parameters,cv=10,scoring='balanced_accuracy',return_train_score=False,n_jobs=-1)
best_KNN.fit(X_train,y_train)

  self.best_estimator_.fit(X, y, **fit_params)


GridSearchCV(cv=10, error_score=nan,
             estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30,
                                            metric='minkowski',
                                            metric_params=None, n_jobs=None,
                                            n_neighbors=5, p=2,
                                            weights='uniform'),
             iid='deprecated', n_jobs=-1,
             param_grid={'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute',
                                       'auto'],
                         'leaf_size': [5, 10, 15, 20, 25], 'n_jobs': [-1],
                         'n_neighbors': [5, 10, 15],
                         'weights': ['uniform', 'distance']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='balanced_accuracy', verbose=0)

In [30]:
## SGD ##
best_SGD = GridSearchCV(SGDClassifier(),SGD_parameters,cv=10,scoring='balanced_accuracy',return_train_score=False,n_jobs=-1)
best_SGD.fit(X_train,y_train)

  y = column_or_1d(y, warn=True)


GridSearchCV(cv=10, error_score=nan,
             estimator=SGDClassifier(alpha=0.0001, average=False,
                                     class_weight=None, early_stopping=False,
                                     epsilon=0.1, eta0=0.0, fit_intercept=True,
                                     l1_ratio=0.15, learning_rate='optimal',
                                     loss='hinge', max_iter=1000,
                                     n_iter_no_change=5, n_jobs=None,
                                     penalty='l2', power_t=0.5,
                                     random_state=None, shuffle=True, tol=0.001,
                                     validation_fraction=0.1...
                                     warm_start=False),
             iid='deprecated', n_jobs=-1,
             param_grid={'class_weight': [None, 'balanced'],
                         'early_stopping': [True],
                         'fit_intercept': [True, False],
                         'learning_rate': ['optima

In [31]:
## Random Forest ##
best_RF = GridSearchCV(RandomForestClassifier(),RF_parameters,cv=10,scoring='balanced_accuracy',return_train_score=False,n_jobs=-1)
best_RF.fit(X_train,y_train)

  self.best_estimator_.fit(X, y, **fit_params)


GridSearchCV(cv=10, error_score=nan,
             estimator=RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                                              class_weight=None,
                                              criterion='gini', max_depth=None,
                                              max_features='auto',
                                              max_leaf_nodes=None,
                                              max_samples=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              n_estimators=100, n_jobs=None,
                                              oob_score=False,
                                              rand

Accuracy Score on Testing Data

In [0]:
## Making Predicitions using the X_test

DT_predictions = best_DT.predict(X_test)
KNN_predictions = best_KNN.predict(X_test)
SGD_predictions = best_SGD.predict(X_test)
RF_predictions = best_RF.predict(X_test)

In [33]:
## Accuarcy Score ##
## Balanced Accuracy Score ##

print("Descision Tree Accuracy Score: "+str(accuracy_score(y_test,DT_predictions,normalize=True)*100)+"%")
print("KNN Accuracy Score: "+str(accuracy_score(y_test,KNN_predictions,normalize=True)*100)+"%")
print("SGDClassifier Accuracy Score: "+str(accuracy_score(y_test,SGD_predictions,normalize=True)*100)+"%")
print("RF Accuracy Score: "+str(accuracy_score(y_test,RF_predictions,normalize=True)*100)+"%")

Descision Tree Accuracy Score: 81.52173913043478%
KNN Accuracy Score: 79.34782608695652%
SGDClassifier Accuracy Score: 77.71739130434783%
RF Accuracy Score: 85.86956521739131%


In [34]:
## Balanced Accuracy Score ##

print("Descision Tree Accuracy Score: "+str(balanced_accuracy_score(y_test,DT_predictions)))
print("KNN Accuracy Score: "+str(balanced_accuracy_score(y_test,KNN_predictions)))
print("SGDClassifier Accuracy Score: "+str(balanced_accuracy_score(y_test,SGD_predictions)))
print("RF Accuracy Score: "+str(balanced_accuracy_score(y_test,RF_predictions)))

Descision Tree Accuracy Score: 0.8130081300813008
KNN Accuracy Score: 0.7910090865614539
SGDClassifier Accuracy Score: 0.7846724055475849
RF Accuracy Score: 0.8558106169296986


In [35]:
## Classification Report Generation ##
print("Descision Tree Report: \n"+str(classification_report(y_test,DT_predictions)))
print("KNN Report: \n"+str(classification_report(y_test,KNN_predictions)))
print("SGDClassifier Report: \n"+str(classification_report(y_test,SGD_predictions)))
print("RF Report: \n"+str(classification_report(y_test,RF_predictions)))

Descision Tree Report: 
              precision    recall  f1-score   support

         0.0       0.79      0.79      0.79        82
         1.0       0.83      0.83      0.83       102

    accuracy                           0.82       184
   macro avg       0.81      0.81      0.81       184
weighted avg       0.82      0.82      0.82       184

KNN Report: 
              precision    recall  f1-score   support

         0.0       0.77      0.77      0.77        82
         1.0       0.81      0.81      0.81       102

    accuracy                           0.79       184
   macro avg       0.79      0.79      0.79       184
weighted avg       0.79      0.79      0.79       184

SGDClassifier Report: 
              precision    recall  f1-score   support

         0.0       0.71      0.85      0.77        82
         1.0       0.86      0.72      0.78       102

    accuracy                           0.78       184
   macro avg       0.78      0.78      0.78       184
weighted avg  

In [36]:
## Using the Random Forest Model, get the importance of each feature

importantFeatures = pd.DataFrame(best_RF.best_estimator_.feature_importances_)
importantFeatures.insert(loc=0,column='Feature Name',value=["age","sex","cp","trestbps","chol","fbs","restecg","thalach","exang","oldpeak","slope"])
importantFeatures.columns = ['Feature Name','Importance Score']
print(importantFeatures)
importantFeatures.sort_values(by=['Importance Score'],ascending=False,inplace=True)
save_DataFrame_as_csv(importantFeatures,"Feature Importances")
print("\n\n")
print(importantFeatures)


   Feature Name  Importance Score
0           age          0.095747
1           sex          0.080947
2            cp          0.127729
3      trestbps          0.118349
4          chol          0.089446
5           fbs          0.112516
6       restecg          0.149443
7       thalach          0.036711
8         exang          0.130660
9       oldpeak          0.027555
10        slope          0.030897



   Feature Name  Importance Score
6       restecg          0.149443
8         exang          0.130660
2            cp          0.127729
3      trestbps          0.118349
5           fbs          0.112516
0           age          0.095747
4          chol          0.089446
1           sex          0.080947
7       thalach          0.036711
10        slope          0.030897
9       oldpeak          0.027555


In [37]:
## Parameter Retrival
DT_params = best_DT.best_estimator_.get_params()
KNN_params = best_KNN.best_estimator_.get_params()
Hinge_params = best_SGD.best_estimator_.get_params()
RF_params = best_RF.best_estimator_.get_params()


# print(type(DT_params))

params = [DT_params,KNN_params,Hinge_params,RF_params]
paramNames = ['DT_params','KNN_params','Hinge_params','RF_params']

for (modelParam,name) in zip(params,paramNames):
  print(name,modelParam)
  paramDF = pd.DataFrame(modelParam,index=[0]).transpose()
  save_DataFrame_as_csv(paramDF,name)


DT_params {'ccp_alpha': 0, 'class_weight': None, 'criterion': 'entropy', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 41, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'presort': 'deprecated', 'random_state': None, 'splitter': 'best'}
KNN_params {'algorithm': 'auto', 'leaf_size': 5, 'metric': 'minkowski', 'metric_params': None, 'n_jobs': -1, 'n_neighbors': 10, 'p': 2, 'weights': 'distance'}
Hinge_params {'alpha': 0.0001, 'average': False, 'class_weight': 'balanced', 'early_stopping': True, 'epsilon': 0.1, 'eta0': 0.0, 'fit_intercept': True, 'l1_ratio': 0.15, 'learning_rate': 'optimal', 'loss': 'hinge', 'max_iter': 1000, 'n_iter_no_change': 5, 'n_jobs': None, 'penalty': 'elasticnet', 'power_t': 0.5, 'random_state': None, 'shuffle': False, 'tol': None, 'validation_fraction': 0.1, 'verbose': 0, 'warm_start': False}
RF_params {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': Non

In [0]:
## Report Generation and Saving


DT_report = classification_report(y_test,DT_predictions,output_dict=True)
KNN_report = classification_report(y_test,KNN_predictions,output_dict=True)
Hinge_report = classification_report(y_test,SGD_predictions,output_dict=True)
RF_report = classification_report(y_test,RF_predictions,output_dict=True)

reports = [DT_report,KNN_report,Hinge_report,RF_report]
reportNames = ['DT_report','KNN_report','Hinge_report','RF_report']

for(report,name) in zip(reports,reportNames):
  reportDF = pd.DataFrame(report).transpose()
  save_DataFrame_as_csv(reportDF,name)

