## The os package

#### Absolute vs relative paths

In [None]:
import os

# Current working directory as absolute path
current_dir = os.getcwd()

# Relative path to a file in the current directory
rel_path = 'myfolder/testfile.txt'

# Construct the absolute path by combining the current directory and the relative path
abs_path = os.path.join(current_dir, rel_path)

print("Current Directory:", current_dir)
print("Relative Path:", rel_path)
print("Absolute Path:", abs_path)

In [None]:
abs_path = os.path.abspath(rel_path)
rel_path = os.path.relpath(abs_path)

#### Working directory

In [2]:
import os

# Get current working directory
cwd = os.getcwd()
print("Current working directory:", cwd)

# Change working directory 2 folders up
os.chdir("..")

# Get current working directory
cwd = os.getcwd()
print("Current working directory:", cwd)

# Set the working directory back to what it was
os.chdir("chapter06")

# Get current working directory
cwd = os.getcwd()
print("Current working directory:", cwd)

Current working directory: D:\projecten\syntra\data science\github 23-24\github\chapter06
Current working directory: D:\projecten\syntra\data science\github 23-24\github
Current working directory: D:\projecten\syntra\data science\github 23-24\github\chapter06


In [6]:
import os

# Create a directory
new_dir = "new_directory"
if os.path.exists(new_dir):
    print(f"{new_dir} already exists!")
else:
    os.mkdir(new_dir)

# Rename a file
os.rename("old_file.txt", "new_file.txt")

In [5]:
# Rename the file back
os.rename("new_file.txt", "old_file.txt")

# Delete a directory
os.rmdir(new_dir)

In [7]:
print(os.listdir())

['.ipynb_checkpoints', 'myfolder', 'new_directory', 'new_file.txt', 'working with data.ipynb']


#### Executing system commands

In [10]:
# Join path components
path = os.path.join("dir1", "dir2", "file.txt")
print("Joined path:", path)

# Get basename of a path
basename = os.path.basename("/path/to/file.txt")
print("Basename:", basename)

# Get directory name of a path
dirname = os.path.dirname("/path/to/file.txt")
print("Directory name:", dirname)

Joined path: dir1\dir2\file.txt
Basename: file.txt
Directory name: /path/to


## Working with text files

In [11]:
with open("example.txt", "r") as file:
    content = file.read()
    print(content)

Why, hello there.


In [13]:
with open("example_write.txt", "w") as file:
    file.write("Hello, World!")

In [14]:
with open("example_write.txt", "a") as file:
    file.write("\n Hello, new line!")

In [15]:
file_name = "example_write.txt"

if not os.path.exists(file_name):
    with open(file_name, "w") as file:
        file.write("Hello, World!")
else:
    print(f"The file '{file_name}' already exists. It was not overwritten.")

The file 'example_write.txt' already exists. It was not overwritten.


#### JSON files

In [20]:
import json

# Load the JSON settings file
settings = dict()

# Access and modify settings
settings['max_iterations'] = 1000
settings['learning_rate'] = 0.001

# Save the updated settings to the JSON file
with open('settings.json', 'w') as file:
    json.dump(settings, file, indent=4)
    
with open('settings.json') as file:
    settings_loaded = json.load(file)

#### Working with tabular data

In [22]:
import pandas as pd

# Reading an Excel file
data = pd.read_excel("data.xlsx", sheet_name="Sheet1")

# Printing the contents
display(data)

Unnamed: 0,city,zip code
0,Putte,2580


In [26]:
import xlsxwriter

# Creating a new Excel file
workbook = xlsxwriter.Workbook("output.xlsx")

# Adding a worksheet
worksheet = workbook.add_worksheet()

# Writing data to the worksheet
worksheet.write("A1", "Hello")
worksheet.write("B1", "World!")

# Closing the workbook
workbook.close()

#### Pickle

In [29]:
# Import the pickle module
import pickle

# Data to be saved in the pickle file
data = {"name": "John", 
            "age": 30, 
            "city": "New York"}

# Save data to a pickle file
with open("data.pickle", "wb") as file:
    pickle.dump(data, file)

# Load data from the pickle file
with open("data.pickle", "rb") as file:
    loaded_data = pickle.load(file)

print(loaded_data) 
# Output: {"name": "John", 
#            "age": 30, 
#            "city": "New York"}

{'name': 'John', 'age': 30, 'city': 'New York'}


#### Working with audio files

In [31]:
import os
from pydub import AudioSegment
import speech_recognition as sr

In [55]:
import pygame.mixer

# Initialize the mixer module
pygame.mixer.init()

# Load an audio file
sound = pygame.mixer.Sound("audio.wav")  # Replace with your audio file path

# Play the audio file
sound.play()

# Wait for the audio to finish playing
pygame.time.wait(sound.get_length() * 1000)  # Convert to milliseconds

# Quit the mixer module
pygame.mixer.quit()

ModuleNotFoundError: No module named 'pygame'

In [36]:
filename = "audio.mp3"
print(os.path.exists(filename))
with open(filename) as audio_file:
    audio = AudioSegment.from_mp3(audio_file)

True


UnicodeDecodeError: 'charmap' codec can't decode byte 0x90 in position 140: character maps to <undefined>

#### Working with image files

In [48]:
from PIL import Image

# Load the image
image = Image.open("pickle_rick.png")

# Display the image
image.show()

In [51]:
# Convert to grayscale
grayscale_image = image.convert("L")

# Display the grayscale image
grayscale_image.show()

In [52]:
# Convert to binary image
threshold = 128
binary_image = grayscale_image.point(lambda x: 0 if x < threshold else 1, "1")

# Display the binary image
binary_image.show()

In [54]:
import numpy as np
bin_matrix = np.asarray(binary_image)
print(bin_matrix)

gray_matrix = np.asarray(grayscale_image)
print(gray_matrix)

rgb_matrix = np.asarray(image)
print(rgb_matrix)

[[ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 ...
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]]
[[255 255 255 ... 255 255 255]
 [255 255 255 ... 255 255 255]
 [255 255 255 ... 255 255 255]
 ...
 [255 255 255 ... 255 255 255]
 [255 255 255 ... 255 255 255]
 [255 255 255 ... 255 255 255]]
[[[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [255 255 255]
  [255 255 255]
  [255 255 255]]

 [[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [255 255 255]
  [255 255 255]
  [255 255 255]]

 [[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [255 255 255]
  [255 255 255]
  [255 255 255]]

 ...

 [[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [255 255 255]
  [255 255 255]
  [255 255 255]]

 [[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [255 255 255]
  [255 255 255]
  [255 255 255]]

 [[255 255 255]
  

#### Web scraping with Python

In [57]:
import requests
from bs4 import BeautifulSoup

# Send an HTTP request to the website
url = "https://example.com"
response = requests.get(url)

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.content, "html.parser")

# Show the HTML content
print(soup.prettify())

In [59]:
print(soup.prettify())

<!DOCTYPE html>
<html>
 <head>
  <title>
   Example Domain
  </title>
  <meta charset="utf-8"/>
  <meta content="text/html; charset=utf-8" http-equiv="Content-type"/>
  <meta content="width=device-width, initial-scale=1" name="viewport"/>
  <style type="text/css">
   body {
        background-color: #f0f0f2;
        margin: 0;
        padding: 0;
        font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
        
    }
    div {
        width: 600px;
        margin: 5em auto;
        padding: 2em;
        background-color: #fdfdff;
        border-radius: 0.5em;
        box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);
    }
    a:link, a:visited {
        color: #38488f;
        text-decoration: none;
    }
    @media (max-width: 700px) {
        div {
            margin: 0 auto;
            width: auto;
        }
    }
  </style>
 </head>
 <body>
  <div>
   <h1>
    Example Domain
   </h1>
   <p>
    This dom

In [60]:
import re

# Define a regex pattern
pattern = r'\d+'  # Matches one or more digits

# Input text
text = "The price is $10 for 2 items."

# Search for the pattern
matches = re.findall(pattern, text)

# Print the matches
print(matches)  # Output: ['10', '2']

['10', '2']
