*Quick reminder:* To run the code in a cell, either click on the "play" icon above, or press Shift+Enter. Cells can be run as many times as needed.

In [None]:
import os
import pandas as pd
from sas7bdat import SAS7BDAT

# Convert SAS files to TSV files  
<i><font size=2 color="grey">Version 1.0, last modified September 2017<br>
Python 3.6<br>
Pandas 0.19.2<br>
sas7bdat 2.0.7<br>
</font></i>
  
  
This notebook contains code to convert SAS files to tab-separated files.

**<u>Expected input:</u>**  
  
Path to directory containing SAS files. <font color="red"><strong>Caution: </strong></font>The following script will try to convert every \*.sas7bdat file in the directory.  
  
**<u>Output:</u>**  

Directory containing tab-separated versions of the SAS files. This directory will be created if it doesn't exist in the directory containing the SAS files.    
  
  
**<u>Parameters:</u>**  
<font color="DarkRed"><strong>Please</strong></font> change the following parameters by commenting out or editing accordingly.

In [None]:
## Path to SAS files directory:
path_to_sass = "placeholder/path/to/files"

## Output directory:
output_dir = "sas_to_text"

The following lines of code parse the content of the input directory and rty to convert each file to a tab-separated text file.

In [None]:
new_files = []

## Create output directory if it doesn't exist:
txt_files_path = os.path.join(path_to_sass, output_dir)
os.makedirs(txt_files_path, exist_ok=True)

## Loop through content of input directory:
for root, dirs, files in os.walk(path_to_sass):
    for ind_file in files:

        ## For each file, check the extension, and only proceed if it is a sas7bdat file:
        extension = ind_file.split(".")[-1]
        if extension == 'sas7bdat':
            text_filename = ".".join([ind_file, "txt"])
            full_filepath = os.path.join(root, ind_file)
            full_txtpath = os.path.join(txt_files_path, text_filename)

            ## Convert sas file:
            try:
                with SAS7BDAT(full_filepath) as f:
                    df = f.to_data_frame()
                df.to_csv(full_txtpath, sep="\t")
                new_files.append(full_txtpath)
            except:
                print("%s could not be converted\n" % ind_file)
                
    ## Only checking the first level in the directory so can break the loop:
    break
    
print("%s files were created in %s:\n" % (len(new_files), txt_files_path))
print("\n".join(new_files))