# Using the `os` library to navigate files

The `os` library is useful for identifying what files you have in order to then do things with them (e.g. import, combine etc)

In [None]:
#import the library - plus pandas
import os
import pandas as pd

## Finding what directory you're in

You can use the `getcwd()` function to tell you the folder you're in.

In [None]:
#show the current working directory
os.getcwd()

'/content'

## Changing directory

And if you specify what folder you want to move into with `chdir()` it will move you there.

In [None]:
#change directory
os.chdir('sample_data')

In [None]:
#show the current working directory
os.getcwd()

'/content/sample_data'

## Listing files

The `listdir()` function will list all the files in the current directory - which is especially useful if you want to import data from some of those files.

In [None]:
#list files
os.listdir()

['anscombe.json',
 'README.md',
 'california_housing_test.csv',
 'mnist_train_small.csv',
 'california_housing_train.csv',
 'mnist_test.csv']

## Making a list of CSV files only

This can be used to create a list to iterate through - and check.

In [None]:
#create an empty list for CSV filenames
csvsonly = []

#loop through the files
for i in os.listdir():
  print(i)
  #add to that list if filename contains '.csv'
  if '.csv' in i:
    csvsonly.append(i)

#check the results
print(csvsonly)

anscombe.json
README.md
mnist_test.csv
mnist_train_small.csv
california_housing_train.csv
california_housing_test.csv
['mnist_test.csv', 'mnist_train_small.csv', 'california_housing_train.csv', 'california_housing_test.csv']


## Looping through matching files to create a combined dataframe

And you can use the same approach to identify files matching a particular criteria and then import them all into a combined dataframe. Below, for example, we've written some code which identifies data files on housing, and then imports that data from all matching files into a dataframe.

In [None]:
#store a list of files
filelist = os.listdir()
#create an empty list for housing CSV files
housingcsvs = []

#loop through the list of files
for i in filelist:
  #check if the filename contains 'housing'
  if 'housing' in i:
    #add it to the housing list if it does
    housingcsvs.append(i)

housingcsvs

['california_housing_test.csv', 'california_housing_train.csv']

In [None]:
#create a df to hold them both
allhousingdf = pd.DataFrame()

#loop through the CSVs
for i in housingcsvs:
  print(i)
  #read them into a temporary dataframe
  housingdf = pd.read_csv(i)
  #append that dataframe to the previously empty dataframe
  allhousingdf = allhousingdf.append(housingdf)

allhousingdf

california_housing_test.csv
california_housing_train.csv


Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
0,-122.05,37.37,27.0,3885.0,661.0,1537.0,606.0,6.6085,344700.0
1,-118.30,34.26,43.0,1510.0,310.0,809.0,277.0,3.5990,176500.0
2,-117.81,33.78,27.0,3589.0,507.0,1484.0,495.0,5.7934,270500.0
3,-118.36,33.82,28.0,67.0,15.0,49.0,11.0,6.1359,330000.0
4,-119.67,36.33,19.0,1241.0,244.0,850.0,237.0,2.9375,81700.0
...,...,...,...,...,...,...,...,...,...
16995,-124.26,40.58,52.0,2217.0,394.0,907.0,369.0,2.3571,111400.0
16996,-124.27,40.69,36.0,2349.0,528.0,1194.0,465.0,2.5179,79000.0
16997,-124.30,41.84,17.0,2677.0,531.0,1244.0,456.0,3.0313,103600.0
16998,-124.30,41.80,19.0,2672.0,552.0,1298.0,478.0,1.9797,85800.0
