# Chapter 1

- Shell commands in python : use with `!` sign. eg: `! ls`
- dataframe to numpy array : `np.array(df)`

### Open txt files

```
# Way 1 : data in raw text
with open('filename.txt', 'r') as file:
    print(file.readline())

# Way 2 : data similar to dataframe format
data = np.genfromtxt('filename.csv', delimiter=',', names=True, dtype=None)

# Way 3 : not recommended
data = np.loadtxt('filename.txt', delimiter= ',', skiprows=1, usecols=[0,2], dtype=str)
```

### Open csv file

```
# Way 1 : Import as dataframe
import pandas as pd
data = pd.read_csv("filename.csv")

# Way 2 : Import as list of tuples
data = np.recfromcsv(file)
```

# Chapter 2

### Linux commands in python

```
import os
# Get current working directory
wd = os.getcwd()
# List of directories in current working directory
os.listdir(wd)
```

### Open pkl files

- pickle files :
    - holds python native properties and structures of objects
    - data is serialized = converted objects into bytestream

```
# Way 1
import pickle
with open('filename.pkl', 'rb') as file:
    data = pickle.load(file)

# Way 2
df = pd.read_pickle('filename.pkl')
```

### Open Excel file

```
import pandas as pd

### Way 1
# Load excel file
data = pd.ExcelFile('filename.xlsx')
# See sheet names
print(data.sheet_names)
# Loading sheet
df1 = data.parse('sheetname') # sheet name
df2 = data.parse(0) # sheet index

### Way 2
df = pd.read_excel('filename.xlsx', sheet_name='sheetname')
```

### Open SAS file

```
# Way 1
import pandas as pd
from sas7bdat import SAS7BDAT
with SAS7BDAT('filename.sas7bdat') as file:
    df_sas = file.to_data_frame()

# Way 2
import pandas as pd
df = pd.read_sas("filename.sas7bdat") 
```

### Open Stata file

```
import pandas as pd
data = pd.read_stata('filename.dta')
```

### Open hdf5 file

```
# Way 1
import h5py
data = h5py.File( 'filename.hdf5', 'r') 
print(type(data))

# Iterate the structure
for key in data.keys():
    print(key)

# Iterate further into the structure
for key in data['some_key'].keys():
    print(key)

# Way 2
import h5py
df = pd.DataFrame(np.array(h5py.File('filename.hdf5')['variable_1']))
```

### Open matlab file

```
# Way 1
import scipy.io
mat = scipy.io.loadmat('filename.mat')
print(type(mat))

# Way 2
from mat4py import loadmat
data = loadmat('datafile.mat')
```

# Chapter 3

```
### Way 1
from sqlalchemy import create_engine
# Connect to database
engine = create_engine('sqlite:///dbname.sqlite')
# See tables in the database
table_names = engine.table_names()
print(table_names)
# Open a connection to pass query
con = engine.connect()
# Execute query
rs = con.execute("SELECT * FROM Orders")
# Store results of all rows
results = rs.fetchall()
# Print the result as list of tuples
print(list(results))
# Column names
column_names = rs.keys()
# Store the result in a dataframe
df = pd.DataFrame(results)
df.columns = column_names
# Close the connection to pass query
con.close()

### Way 2
from sqlalchemy import create_engine
import pandas as pd
engine = create_engine('sqlite:///dbname.sqlite') # or path
df = pd.read_sql_query("SELECT * FROM tableName", engine)
```