## Working with paths

In [8]:
import os

current_file = os.path.realpath('atin.txt')
print('current file: {}'.format(current_file))
# Note: in .py files we can get the path of current file by __file__

current_dir = os.path.dirname(current_file)  
print('current directory: {}'.format(current_dir))
# Note: in .py files we can get the dir of current file by os.path.dirname(__file__)

data_dir = os.path.join(current_dir, 'data')
print('data directory: {}'.format(data_dir))

current file: /home/u1/Python-Scala-Spark-Training/Exercises/atin.txt
current directory: /home/u1/Python-Scala-Spark-Training/Exercises
data directory: /home/u1/Python-Scala-Spark-Training/Exercises/data


### Checking if path exists

In [5]:
print('exists: {}'.format(os.path.exists(data_dir)))
print('is file: {}'.format(os.path.isfile(data_dir)))
print('is directory: {}'.format(os.path.isdir(data_dir)))

exists: True
is file: True
is directory: False


## Reading files

In [9]:
file_path = os.path.join(data_dir, 'atin.txt')

with open(file_path, 'r') as xyz:
    for line in xyz:
        print(line)

Atin Gupta



In [10]:
file_path = os.path.join(data_dir, 'atin.txt')

# THIS IS NOT THE PREFERRED WAY. Use With Statement instead. Example is in the next cell
simple_file = open(file_path, 'r')
for line in simple_file:
    print(line.strip())
simple_file.close()  # This has to be called explicitly 

Atin Gupta


## Writing files

In [11]:
new_file_path = os.path.join(data_dir, 'new_file.txt')

with open(new_file_path, 'w') as my_file:
    my_file.write('This is my first file that I wrote with Python.')

Now go and check that there is a new_file.txt in the data directory. After that we can delete the file by:

In [12]:
if os.path.exists(new_file_path):  # make sure it's there
    os.remove(new_file_path)

## Python Pickling

 - Python pickle module is used for serializing and de-serializing a Python object structure. 
 - Any object in Python can be pickled so that it can be saved on disk.
 - What pickle does is that it “serializes” the object first before writing it to file.
 - Pickling is a way to convert a python object (list, dict, etc.) into a character stream.
 - The idea is that this character stream contains all the information necessary to reconstruct the object in another python script.

In [20]:
# Python3 program to illustrate store 
# efficiently using pickle module 
# Module translates an in-memory Python object 
# into a serialized byte stream—a string of 
# bytes that can be written to any file-like object. 

import pickle 

def storeData(db):
    # Its important to use binary mode 
    dbfile = open('examplePickle', 'wb') 
    
    # source, destination 
    pickle.dump(db, dbfile)
    dbfile.close() 

def loadData(): 
    # for reading also binary mode is important 
    dbfile = open('examplePickle', 'rb')
    db = pickle.load(dbfile) 

    dbfile.close()
    
    return db

In [21]:
# initializing data to be stored in db 
Omkar = {'key' : 'Omkar', 'name' : 'Omkar Pathak', 
'age' : 21, 'pay' : 40000} 
Jagdish = {'key' : 'Jagdish', 'name' : 'Jagdish Pathak', 
'age' : 50, 'pay' : 50000} 

# database 
db = {} 
db['Omkar'] = Omkar 
db['Jagdish'] = Jagdish 

print(db)


{'Omkar': {'key': 'Omkar', 'name': 'Omkar Pathak', 'age': 21, 'pay': 40000}, 'Jagdish': {'key': 'Jagdish', 'name': 'Jagdish Pathak', 'age': 50, 'pay': 50000}}


In [22]:
storeData(db)

In [23]:
atindb = loadData()


Omkar => {'key': 'Omkar', 'name': 'Omkar Pathak', 'age': 21, 'pay': 40000}
Jagdish => {'key': 'Jagdish', 'name': 'Jagdish Pathak', 'age': 50, 'pay': 50000}


In [None]:
print(atindb)

### Advantages
 - Helps in saving complicated data.
 - Quite easy to use, doesn’t require several lines of code and hence not bulky.
 - Saved data is not so readable hence provides some data security.

### Disadvantages
 - Non-Python programs may not be able to reconstruct pickled Python objects.
 - Security risks in unpickling data from malicious sources.