In [None]:
"""
Parameter Details
filename - the path to your file or, if the file is in the working directory, the filename of your file
access_mode - a string value that determines how the file is opened
buffering - an integer value used for optional line buffering
"""

In [None]:
"""
File modes - 
There are different modes you can open a file with, specified by the mode parameter. These include:

'r' - reading mode. The default. It allows you only to read the file, not to modify it. When using this mode the
file must exist.
'w' - writing mode. It will create a new file if it does not exist, otherwise will erase the file and allow you to
write to it.
'a' - append mode. It will write data to the end of the file. It does not erase the file, and the file must exist for
this mode.
'rb' - reading mode in binary. This is similar to r except that the reading is forced in binary mode. This is
also a default choice.
'r+' - reading mode plus writing mode at the same time. This allows you to read and write into files at the
same time without having to use r and w.
'rb+' - reading and writing mode in binary. The same as r+ except the data is in binary
'wb' - writing mode in binary. The same as w except the data is in binary.
'w+' - writing and reading mode. The exact same as r+ but if the file does not exist, a new one is made.
Otherwise, the file is overwritten.
'wb+' - writing and reading mode in binary mode. The same as w+ but the data is in binary.
'ab' - appending in binary mode. Similar to a except that the data is in binary.
'a+' - appending and reading mode. Similar to w+ as it will create a new file if the file does not exist.
Otherwise, the file pointer is at the end of the file if it exists.
'ab+' - appending and reading mode in binary. The same as a+ except that the data is in binary.


with open(filename, 'r') as f:
    f.read()
with open(filename, 'w') as f:
    f.write(filedata)
with open(filename, 'a') as f:
    f.write('\\n' + newdata)

Python 3 added a new mode for exclusive creation so that you will not accidentally truncate or overwrite and
existing file.
'x' - open for exclusive creation, will raise FileExistsError if the file already exists
'xb' - open for exclusive creation writing mode in binary. The same as x except the data is in binary.
'x+' - reading and writing mode. Similar to w+ as it will create a new file if the file does not exist. Otherwise,
will raise FileExistsError.
'xb+' - writing and reading mode. The exact same as x+ but the data is binary
"""


In [10]:
#Simple file program, creating file

with open('myfile.txt','w') as fp:
    fp.write("Hello \ngood \nmorning")
    fp.close()

In [11]:
#reading file

with open('myfile.txt', 'r') as fp:
    for line in fp:
        print(line)

Hello 

good 

morning


In [13]:
#reading line by line
with open('myfile.txt', 'r') as fp:
    print(fp.readline())
    print(fp.readline())
    print(fp.readline())

Hello 

good 

morning


In [14]:
#reading all lines
with open('myfile.txt', 'r') as fp:
    print(fp.readlines())

['Hello \n', 'good \n', 'morning']


In [39]:
#Directory listing
#os.scandir() returns an iterator as opposed to a list when called
import os
mydir = os.scandir(r'C:\Users\SP\Desktop\WorldofDS\JourneyintoDataScience')
print(mydir)

<nt.ScandirIterator object at 0x0000023364AEC220>


In [38]:
print(dir(os))

['DirEntry', 'F_OK', 'MutableMapping', 'O_APPEND', 'O_BINARY', 'O_CREAT', 'O_EXCL', 'O_NOINHERIT', 'O_RANDOM', 'O_RDONLY', 'O_RDWR', 'O_SEQUENTIAL', 'O_SHORT_LIVED', 'O_TEMPORARY', 'O_TEXT', 'O_TRUNC', 'O_WRONLY', 'P_DETACH', 'P_NOWAIT', 'P_NOWAITO', 'P_OVERLAY', 'P_WAIT', 'PathLike', 'R_OK', 'SEEK_CUR', 'SEEK_END', 'SEEK_SET', 'TMP_MAX', 'W_OK', 'X_OK', '_Environ', '__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__spec__', '_execvpe', '_exists', '_exit', '_fspath', '_get_exports_list', '_putenv', '_unsetenv', '_wrap_close', 'abc', 'abort', 'access', 'altsep', 'chdir', 'chmod', 'close', 'closerange', 'cpu_count', 'curdir', 'defpath', 'device_encoding', 'devnull', 'dup', 'dup2', 'environ', 'error', 'execl', 'execle', 'execlp', 'execlpe', 'execv', 'execve', 'execvp', 'execvpe', 'extsep', 'fdopen', 'fsdecode', 'fsencode', 'fspath', 'fstat', 'fsync', 'ftruncate', 'get_exec_path', 'get_handle_inheritable', 'get_inheritable', 'get_ter

In [37]:
#The ScandirIterator points to all the entries in the current directory.
#You can loop over the contents of the iterator and print out the filenames

import os

with os.scandir(r'C:\Users\SP\Desktop\WorldofDS\JourneyintoDataScience') as entries:
    for entry in entries:
        print(entry.name)

.git
.ipynb_checkpoints
001_Basics.ipynb
002_Datetime_basics.ipynb
003_datetime_advance.ipynb
004_Sets_ops and simple math_ops.ipynb
005_Variable scope and binding.ipynb
006_Conditionals_Loops.ipynb
007_Arrays.ipynb
008_dictionary.ipynb
009_Lists.ipynb
010_list_comprehensions.ipynb
011_Tuples.ipynb
012_files_and_folders.ipynb
myfile.txt
README.md


In [40]:
#Another method

from pathlib import Path

entries = Path(r'C:\Users\SP\Desktop\WorldofDS\JourneyintoDataScience')
for entry in entries.iterdir():
    print(entry.name)

.git
.ipynb_checkpoints
001_Basics.ipynb
002_Datetime_basics.ipynb
003_datetime_advance.ipynb
004_Sets_ops and simple math_ops.ipynb
005_Variable scope and binding.ipynb
006_Conditionals_Loops.ipynb
007_Arrays.ipynb
008_dictionary.ipynb
009_Lists.ipynb
010_list_comprehensions.ipynb
011_Tuples.ipynb
012_files_and_folders.ipynb
myfile.txt
README.md


In [None]:
"""
Using pathlib.Path() or os.scandir() instead of os.listdir() is the preferred way of getting a directory listing, 
especially when you’re working with code that needs the file type and file attribute information.
pathlib.Path() offers much of the file and path handling functionality found in os and shutil, 
and it’s methods are more efficient than some found in these modules.

Here are the directory-listing functions again:
Function 	Description
os.listdir() 	Returns a list of all files and folders in a directory
os.scandir() 	Returns an iterator of all the objects in a directory including file attribute information
pathlib.Path.iterdir() 	Returns an iterator of all the objects in a directory including file attribute information

These functions return a list of everything in the directory, including subdirectories.
"""

In [46]:
#listing all files in a directory

import os

#list all files in a directory using os.scandir()

basepath = r'C:\Users\SP\Desktop\WorldofDS\JourneyintoDataScience'
with os.scandir(basepath) as entries:
    for entry in entries:
        if entry.is_file():
            print(entry.name)


001_Basics.ipynb
002_Datetime_basics.ipynb
003_datetime_advance.ipynb
004_Sets_ops and simple math_ops.ipynb
005_Variable scope and binding.ipynb
006_Conditionals_Loops.ipynb
007_Arrays.ipynb
008_dictionary.ipynb
009_Lists.ipynb
010_list_comprehensions.ipynb
011_Tuples.ipynb
012_files_and_folders.ipynb
myfile.txt
README.md


In [49]:
#listing all sub directories in a directory

import os

#list all files subdirectories using os.scandir()

basepath = r'C:\Users\SP\Desktop\WorldofDS'

with os.scandir(basepath) as entries:
    for entry in entries:
        if entry.is_dir():
            print(entry.name)

JourneyintoDataScience
weekday2019_09_05


In [56]:
#getting file attributes
"""
Python makes retrieving file attributes such as file size and modified times easy. 
This is done through os.stat(), os.scandir(), or pathlib.Path().

os.scandir() and pathlib.Path() retrieve a directory listing with file attributes combined. 
This can be potentially more efficient than using os.listdir() to list files and 
then getting file attribute information for each file.
"""

import os
from datetime import datetime

basepath = r'C:\Users\SP\Desktop\WorldofDS\JourneyintoDataScience'
with os.scandir(basepath) as dir_contents:
    for entry in dir_contents:
        if entry.is_file():
            info = entry.stat()
            print(entry.name,'\t',info.st_mtime,'\t',datetime.utcfromtimestamp(info.st_mtime).strftime('%d %b %Y'))
        

001_Basics.ipynb 	 1570243862.1214387 	 05 Oct 2019
002_Datetime_basics.ipynb 	 1570250108.0989268 	 05 Oct 2019
003_datetime_advance.ipynb 	 1570278138.3069198 	 05 Oct 2019
004_Sets_ops and simple math_ops.ipynb 	 1570281319.801463 	 05 Oct 2019
005_Variable scope and binding.ipynb 	 1570284059.0920303 	 05 Oct 2019
006_Conditionals_Loops.ipynb 	 1570291746.9190738 	 05 Oct 2019
007_Arrays.ipynb 	 1570311710.5029402 	 05 Oct 2019
008_dictionary.ipynb 	 1570316927.1665032 	 05 Oct 2019
009_Lists.ipynb 	 1570338453.6358786 	 06 Oct 2019
010_list_comprehensions.ipynb 	 1570341625.2101002 	 06 Oct 2019
011_Tuples.ipynb 	 1570415302.0550694 	 07 Oct 2019
012_files_and_folders.ipynb 	 1570421197.5097034 	 07 Oct 2019
myfile.txt 	 1570417018.1169958 	 07 Oct 2019
README.md 	 1570210731.293132 	 04 Oct 2019


In [63]:
#making directories

"""
Function 	Description
os.mkdir() 	Creates a single subdirectory
pathlib.Path.mkdir() 	Creates single or multiple directories
os.makedirs() 	Creates multiple directories, including intermediate directories
"""

import os
os.mkdir("test1_dir")
os.makedirs("test2_dir")

In [64]:
#reading created dirs
basepath = r'C:\Users\SP\Desktop\WorldofDS\JourneyintoDataScience'
with os.scandir(basepath) as entries:
    for entry in entries:
        if entry.is_dir():
            print(entry.name)

.git
.ipynb_checkpoints
test1_dir
test2_dir
test_dir


In [90]:
#creating nested directory
import os
os.makedirs("test2_dir/2018/10/07")

In [96]:
#printing nexted directory
"""
os.walk() is used to generate filename in a directory tree by walking the tree either top-down or bottom-up

os.walk() returns three values on each iteration of the loop:
    The name of the current folder
    A list of folders in the current folder
    A list of files in the current folder

On each iteration, it prints out the names of the subdirectories and files it finds
"""

basepath = r'C:\Users\SP\Desktop\WorldofDS\JourneyintoDataScience'
for dirpath, dirnames, files in os.walk(basepath):
    print(f'Found directory: {dirpath}')
    for file_name in files:
        print(file_name)

Found directory: C:\Users\SP\Desktop\WorldofDS\JourneyintoDataScience
001_Basics.ipynb
002_Datetime_basics.ipynb
003_datetime_advance.ipynb
004_Sets_ops and simple math_ops.ipynb
005_Variable scope and binding.ipynb
006_Conditionals_Loops.ipynb
007_Arrays.ipynb
008_dictionary.ipynb
009_Lists.ipynb
010_list_comprehensions.ipynb
011_Tuples.ipynb
012_files_and_folders.ipynb
myfile.txt
README.md
Found directory: C:\Users\SP\Desktop\WorldofDS\JourneyintoDataScience\.git
.COMMIT_EDITMSG.swp
COMMIT_EDITMSG
config
description
HEAD
index
packed-refs
Found directory: C:\Users\SP\Desktop\WorldofDS\JourneyintoDataScience\.git\hooks
applypatch-msg.sample
commit-msg.sample
fsmonitor-watchman.sample
post-update.sample
pre-applypatch.sample
pre-commit.sample
pre-push.sample
pre-rebase.sample
pre-receive.sample
prepare-commit-msg.sample
update.sample
Found directory: C:\Users\SP\Desktop\WorldofDS\JourneyintoDataScience\.git\info
exclude
Found directory: C:\Users\SP\Desktop\WorldofDS\JourneyintoDataScie

In [102]:
#To traverse the directory tree in a bottom-up manner, pass in a topdown=False keyword argument to os.walk():

for dirpath, dirnames, files in os.walk(basepath, topdown = False):
    print(f'found dir: {dirpath}')
    for file_name in files:
        print(file_name)

found dir: C:\Users\SP\Desktop\WorldofDS\JourneyintoDataScience\.git\hooks
applypatch-msg.sample
commit-msg.sample
fsmonitor-watchman.sample
post-update.sample
pre-applypatch.sample
pre-commit.sample
pre-push.sample
pre-rebase.sample
pre-receive.sample
prepare-commit-msg.sample
update.sample
found dir: C:\Users\SP\Desktop\WorldofDS\JourneyintoDataScience\.git\info
exclude
found dir: C:\Users\SP\Desktop\WorldofDS\JourneyintoDataScience\.git\logs\refs\heads
master
found dir: C:\Users\SP\Desktop\WorldofDS\JourneyintoDataScience\.git\logs\refs\remotes\origin
HEAD
master
found dir: C:\Users\SP\Desktop\WorldofDS\JourneyintoDataScience\.git\logs\refs\remotes
found dir: C:\Users\SP\Desktop\WorldofDS\JourneyintoDataScience\.git\logs\refs
found dir: C:\Users\SP\Desktop\WorldofDS\JourneyintoDataScience\.git\logs
HEAD
found dir: C:\Users\SP\Desktop\WorldofDS\JourneyintoDataScience\.git\objects\03
4f5e4db4479cb1fc0e1de975bde268df53ef49
found dir: C:\Users\SP\Desktop\WorldofDS\JourneyintoDataScience

In [105]:
help(os.walk)

Help on function walk in module os:

walk(top, topdown=True, onerror=None, followlinks=False)
    Directory tree generator.
    
    For each directory in the directory tree rooted at top (including top
    itself, but excluding '.' and '..'), yields a 3-tuple
    
        dirpath, dirnames, filenames
    
    dirpath is a string, the path to the directory.  dirnames is a list of
    the names of the subdirectories in dirpath (excluding '.' and '..').
    filenames is a list of the names of the non-directory files in dirpath.
    Note that the names in the lists are just names, with no path components.
    To get a full path (which begins with top) to a file or directory in
    dirpath, do os.path.join(dirpath, name).
    
    If optional arg 'topdown' is true or not specified, the triple for a
    directory is generated before the triples for any of its subdirectories
    (directories are generated top down).  If topdown is false, the triple
    for a directory is generated after the 

In [109]:
#Making temporary Files and Directories

from tempfile import TemporaryFile

#Create a temp file and write some date to it
fp = TemporaryFile('w+t')
fp.write("Hello MilkyWay")

#Go back to beginning and read data from file
fp.seek(0)
data = fp.readlines()
print(data)
fp.close()

['Hello MilkyWay']


In [114]:
#Deleting files and Driectories

"""
To delete a single file, use pathlib.Path.unlink(), os.remove(). or os.unlink().

os.remove() and os.unlink() are semantically identical. 
To delete a file using os.remove(), do the following:
"""

import os

os.listdir(r'C:\Users\SP\Desktop\WorldofDS\JourneyintoDataScience')
print("before removal")
basepath = r'C:\Users\SP\Desktop\WorldofDS\JourneyintoDataScience\myfile.txt'
os.remove('myfile.txt')


before removal


FileNotFoundError: [WinError 2] The system cannot find the file specified: 'myfile.txt'

In [117]:
basepath = r'C:\Users\SP\Desktop\WorldofDS\JourneyintoDataScience'
with os.scandir(basepath) as entries:
    for entry in entries:
        if entry.is_file():
            print(entry.name)

001_Basics.ipynb
002_Datetime_basics.ipynb
003_datetime_advance.ipynb
004_Sets_ops and simple math_ops.ipynb
005_Variable scope and binding.ipynb
006_Conditionals_Loops.ipynb
007_Arrays.ipynb
008_dictionary.ipynb
009_Lists.ipynb
010_list_comprehensions.ipynb
011_Tuples.ipynb
012_files_and_folders.ipynb
README.md


In [119]:
#removing directory
"""
The standard library offers the following functions for deleting directories:

    os.rmdir()
    pathlib.Path.rmdir()
    shutil.rmtree()

To delete a single directory or folder, use os.rmdir() or pathlib.rmdir(). These two functions only work if the directory you’re trying to delete is empty. 
If the directory isn’t empty, an OSError is raised.
"""
basepath = r'C:\Users\SP\Desktop\WorldofDS\JourneyintoDataScience'
with os.scandir(basepath) as entries:
    for entry in entries:
        if entry.is_dir():
            print(entry.name)

.git
.ipynb_checkpoints
test1_dir
test2_dir
test_dir


In [124]:
os.rmdir('test_dir')

In [125]:
#after deletion
basepath = r'C:\Users\SP\Desktop\WorldofDS\JourneyintoDataScience'
with os.scandir(basepath) as entries:
    for entry in entries:
        if entry.is_dir():
            print(entry.name)

.git
.ipynb_checkpoints
test2_dir


In [126]:
#deleting entire directory trees
#To delete non-empty directories and entire directory trees, Python offers shutil.rmtree():

import shutil

basepath = r'C:\Users\SP\Desktop\WorldofDS\JourneyintoDataScience\test2_dir'

try:
    shutil.rmtree(basepath)
except OSError as e:
    print(f'Error: {trash_dir} : {e.strerror}')


In [130]:
basepath = r'C:\Users\SP\Desktop\WorldofDS\JourneyintoDataScience'
with os.scandir(basepath) as entries:
    for entry in entries:
        if entry.is_dir():
            print(entry.name)

.git
.ipynb_checkpoints


In [131]:
basepath = r'C:\Users\SP\Desktop\WorldofDS\JourneyintoDataScience'
with os.scandir(basepath) as entries:
    for entry in entries:
        if entry.is_file():
            print(entry.name)

001_Basics.ipynb
002_Datetime_basics.ipynb
003_datetime_advance.ipynb
004_Sets_ops and simple math_ops.ipynb
005_Variable scope and binding.ipynb
006_Conditionals_Loops.ipynb
007_Arrays.ipynb
008_dictionary.ipynb
009_Lists.ipynb
010_list_comprehensions.ipynb
011_Tuples.ipynb
012_files_and_folders.ipynb
README.md


In [None]:
"""
Function 	Description
os.remove() 	Deletes a file and does not delete directories
os.unlink() 	Is identical to os.remove() and deletes a single file
pathlib.Path.unlink() 	Deletes a file and cannot delete directories
os.rmdir() 	Deletes an empty directory
pathlib.Path.rmdir() 	Deletes an empty directory
shutil.rmtree() 	Deletes entire directory tree and can be used to delete non-empty directories
"""

In [None]:
"""
Copying Files in Python - 

shutil offers a couple of functions for copying files. The most commonly used functions are shutil.copy() and shutil.copy2(). 
To copy a file from one location to another using shutil.copy(), do the following:

import shutil

src = 'path/to/file.txt'
dst = 'path/to/dest_dir'
shutil.copy(src, dst)

To preserve all file metadata when copying, use shutil.copy2():

import shutil

src = 'path/to/file.txt'
dst = 'path/to/dest_dir'
shutil.copy2(src, dst)
Using .copy2() preserves details about the file such as last access time, permission bits, last modification time, and flags.

Copying Directories - 

import shutil
shutil.copytree('data_1', 'data1_backup')
'data1_backup'

 .copytree() copies the contents of data_1 to a new location data1_backup and returns the destination directory. The destination directory must not already exist. 
 It will be created as well as missing parent directories. 
 

To move a file or directory to another location, use shutil.move(src, dst). 

>>> import shutil
>>> shutil.move('dir_1/', 'backup/')
'backup'

os.rename('first.zip', 'first_01.zip')

"""


