# Converts neiss1999 ... neiss2018 ... excel files to a pckl file.

<br/>
<br/>
File:  convert_neiss_original_data_to_pckl.ipynb<br/>
Author:  Martin Corbett<br/>
Copyright:  (c) 2019 All rights reserved, worldwide.<br/>
<br/>
<br/>
------------------------------------------------------------------------------------------------<br/>
Instructions for use of this application.<br/>
------------------------------------------------------------------------------------------------<br/>
<br/>
A dialog box will appear when the application starts.<br/>
<br/>
If you have a pckl file already then, choose that pckl file and the application will show the<br/>
dataframe contents, subsequently exiting.<br/>
<br/>
If you do not have a pckl file then select one or more neissXXXX.xlsx files.  The appliction will create<br/>
dataframe and concatentate each file that is read to the dataframe existing before the new file is <br/>
read.  When all files have been read the dataframe will be saved to neiss_data.pckl in the directory <br/>
that the xlsx files where located.  Subsequently, the contents of the dataframe that was saved is <br/>
displayed.<br/>
<br/>
<br/>

In [1]:
import numpy as np
import os
import pandas as pd
import pickle
from PyQt5.QtWidgets import QApplication, QWidget, QInputDialog, QLineEdit, QFileDialog
from PyQt5.QtGui import QIcon
from PyQt5 import QtGui
import re
import sys

- If Neiss_data.pckl exists read it into the dataframe.  Otherwise, read all the neiss1999 ... neiss2018 files into a dataframe and save that dataframe as neiss_data.pckl

In [None]:

class App(QApplication):
    def __init__(self, args):
        super(App, self).__init__(args)

        self.main(args)
        
    def getFilenamesFromUser(self, caption, dir_name, file_filter):
        return QFileDialog.getOpenFileNames(
            None, caption, directory=dir_name, filter=file_filter)
    
    def readNeissPcklData(self, pckl_pathname):
        df = pd.DataFrame()        

        with open( pckl_pathname, "rb" ) as f:
            df = pickle.load(f)

        return df

    def readNeissOriginalFiles(self, file_names):
        df = pd.DataFrame()
        for index, pathname in enumerate(file_names):
            file_name, file_extension = os.path.splitext(pathname)

            f_name, f_ext = os.path.splitext(os.path.basename(pathname))
            fname_w_ext = f_name + f_ext
    
            #pathname = '"{}"'.format(pathname)

            if file_extension == '.pckl':
                if 1 != len(file_names):
                    raise Exception('Exception:  support for only a single pckl file is allowed')
                elif 0 != index:
                    raise Exception('Exception:  intermixing xlsx and pckl files is not allowed')
                else:
                    print("Processing pckl {}  ... ".format(fname_w_ext), end="")
                    df = self.readNeissPcklData(pathname)
                    print("done!")

            else:
                print("Processing neiss {}  ... ".format(fname_w_ext), end="")
                df_file = pd.read_excel(pathname)
                df = pd.concat([df, df_file])
                print("done!")
        return df
    
    def saveNeissOriginalDataToPcklFile(self, df, pckl_fname):
        with open(pckl_fname, 'wb') as f:
            pickle.dump(df, f)

    def main(self, args):
        (file_names, _) = self.getFilenamesFromUser(
            caption='Neiss file selector',
            dir_name = os.getcwd(),
            file_filter = "Neiss excel files (*.xlsx);;Pickle files (*.pckl)")
        
        df = self.readNeissOriginalFiles(file_names)

        pckl_fname  = os.path.dirname(file_names[0]) + '/neiss_data.pckl'
        if pckl_fname != file_names[0]:
            self.saveNeissOriginalDataToPcklFile(df, pckl_fname)
            print("Saved", pckl_fname)

        display(df)

if __name__ == '__main__':
    app = App(sys.argv)
    return app.exec();


Processing pckl neiss_data.pckl  ... done!


Unnamed: 0,CPSC_Case_Number,Treatment_Date,Age,Sex,Race,Other_Race,Body_Part,Diagnosis,Other_Diagnosis,Disposition,Location,Fire_Involvement,Product_1,Product_2,Narrative_1,Narrative_2,Stratum,PSU,Weight
0,100001,1999-12-24,41,2,1.0,0,31,71,,1,0,0,3299,0,41 YR FEMALE FELL WHILE WALKING. DX: SEVERE MU...,/RIGHT BUTTOCKS & BACK.,S,71,68.1086
1,100002,1999-12-27,80,1,2.0,0,31,57,,1,0,0,611,0,80 YR MALE FELL IN BATHROOM/HE HIT RIGHT RIBS ...,DX: FRACTURED RIGHT RIBS-UPPER TRUNK,S,71,68.1086
2,100003,1999-12-27,4,1,1.0,0,75,53,,1,0,0,1328,0,4 YR MALE HAD METAL LARGE WAGON WHEEL FALL & H...,DX: CONTUSIN ON HEAD/NO LOC.,S,71,68.1086
3,100005,1999-12-28,18,1,0.0,,94,53,,1,0,0,1205,0,CONTUSION EAR - STRUCK IN RIGHT EAR WITH BASKE...,,S,7,68.1086
4,100009,1999-12-28,19,2,0.0,,92,64,,1,0,0,5031,0,SPRAIN THUMB - INJURED THUMB WHEN SNOWBOARDING,,S,7,68.1086
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
339239,10500300,2000-11-12,30,1,0.0,,93,57,,1,1,0,1842,0,PT SLIPPED ON STEP. DX: FX TOE,,L,42,78.7917
339240,10500306,2000-11-14,75,1,0.0,,89,64,,1,4,0,5040,0,PT WAS ON BIKE AND WAS HIT BY CAR. DX: SPRAIN ...,IONS AND CONTUSIONS,L,42,78.7917
339241,10500320,2000-11-10,46,2,0.0,,79,64,,4,1,0,1807,0,"PT WAS SCRUBBING FLOOR DX: SPRAINED THORACIC, ...",,L,42,78.7917
339242,10500329,2000-11-27,218,2,0.0,,76,59,,1,1,0,4056,0,FELL AND HIT HEAD ON CABINET. DX: LAC FOREHEAD,,L,42,78.7917
