## 1. Using subprocess and os

**Handling output streams**

In [2]:
import subprocess

try:
    result = subprocess.run(
        ["ls", "non_existent_directory"],
        capture_output=True,
        text=True,
        check=True  # Raises CalledProcessError for non-zero exit codes
    )
except subprocess.CalledProcessError as e:
    with open("error_log.txt", "w") as error_file:
        error_file.write(e.stderr)  # Write error message to file


Command '['ls', 'non_existent_directory']' returned non-zero exit status 1.


**Pipelines**

In [5]:
import subprocess

# Create a pipeline to find files containing 'test'
ls_proc = subprocess.Popen(["ls", "-l"], stdout=subprocess.PIPE)
grep_proc = subprocess.Popen(["grep", "git_examples.ipynb"], stdin=ls_proc.stdout, stdout=subprocess.PIPE)

ls_proc.stdout.close()  # Allow ls_proc to receive a SIGPIPE if grep_proc exits
output, errors = grep_proc.communicate()  # Capture output

print(output.decode())  # Print the filtered output


-rw-r--r--@ 1 sergiopaniegoblanco  staff   1842 Sep 30 15:55 git_examples.ipynb



**Asyncronous command execution**

Running multiple commands in parallels and wait for their completion

In [13]:
import asyncio
import nest_asyncio

# Allow nested event loops
nest_asyncio.apply()

async def run_command(command):
    process = await asyncio.create_subprocess_exec(*command, stdout=asyncio.subprocess.PIPE)
    stdout, _ = await process.communicate()
    return stdout.decode()

async def main():
    commands = [["echo", "Hello"], ["sleep", "2"], ["echo", "World"]]
    results = await asyncio.gather(*(run_command(cmd) for cmd in commands))
    print("\n".join(results))

asyncio.run(main())


Hello


World



**Environment variables**

In [None]:
import os
import subprocess

# Modify the PATH environment variable
my_env = os.environ.copy()
my_env["PATH"] = "/custom/path:" + my_env["PATH"]

result = subprocess.run(
    ["my_command"],
    env=my_env,
    capture_output=True,
    text=True
)

print(result.stdout)


**Error handling and retries**

In [None]:
import subprocess
import time

def run_with_retries(command, retries=3):
    for attempt in range(retries):
        try:
            result = subprocess.run(command, check=True, capture_output=True)
            return result.stdout
        except subprocess.CalledProcessError as e:
            print(f"Attempt {attempt + 1} failed: {e}")
            time.sleep(1)  # Wait before retrying
    raise Exception("Command failed after multiple attempts.")

output = run_with_retries(["curl", "http://example.com"])
print(output.decode())


## 2. Files and directories manipulation

**File creation**

In [23]:
with open("example.txt", "w") as file:
    file.write("Hello, World!")


**Copying a file**

In [24]:
import shutil
shutil.copy("example.txt", "example_copy.txt")


'example_copy.txt'

**Removing a file**

In [25]:
import os
os.remove("example_copy.txt")

**Creating and removing directories**

In [29]:
os.makedirs("example_dir", exist_ok=True)

In [30]:
shutil.rmtree("example_dir")

**Reading log files**

In [36]:
with open("logfile.log", "r") as log_file:
    for line in log_file:
        print(line.strip())


[2024-09-30 10:01:23] INFO: System started.
[2024-09-30 10:03:11] ERROR: Failed to connect to the database.
[2024-09-30 10:04:55] INFO: Connection to the database re-established.
[2024-09-30 10:05:33] INFO: Backup started.
[2024-09-30 10:07:15] INFO: Backup completed successfully.
[2024-09-30 10:08:45] ERROR: Network timeout during sync operation.
[2024-09-30 10:09:22] INFO: Sync operation resumed.


**Filtering logs entries**

In [1]:
with open("logfile.log", "r") as log_file:
    for line in log_file:
        if "ERROR" in line:
            print(line.strip())


[2024-09-30 10:03:11] ERROR: Failed to connect to the database.
[2024-09-30 10:08:45] ERROR: Network timeout during sync operation.


**Writing to log files**

In [45]:
with open("logfile.log", "a") as log_file:
    log_file.write("New log entry\n")


## Regular expressions in Python using `re`

**Basic matching**

In [14]:
import re

text = "Hello, World!"
pattern = r"Hello"

if re.search(pattern, text):
    print("Match found!")
else:
    print("No match.")


Match found!


**Extracting digits**

In [22]:
import re

text = "The price is 42 dollars."
pattern = r"\d+"  # Match one or more digits

digits = re.findall(pattern, text)
print("Digits found:", digits)


Digits found: ['42']


**Email validation**

In [24]:
import re

def validate_email(email):
    pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
    if re.match(pattern, email):
        return True
    return False

# Test the function
emails = ["example@gmail.com", "invalid-email@", "user.name@domain.co.uk"]
for email in emails:
    print(f"{email}: {validate_email(email)}")


example@gmail.com: True
invalid-email@: False
user.name@domain.co.uk: True


**Extracting phone numbers**

In [17]:
import re

def extract_phone_numbers(text):
    pattern = r'\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b'
    return re.findall(pattern, text)

# Sample text containing phone numbers
sample_text = "Call me at 123-456-7890 or 987.654.3210. My office number is 555 666 7777."
phone_numbers = extract_phone_numbers(sample_text)
print("Extracted Phone Numbers:", phone_numbers)


Extracted Phone Numbers: ['123-456-7890', '987.654.3210', '555 666 7777']


**Replacing words**

In [26]:
import re

def replace_word(text, old_word, new_word):
    pattern = re.escape(old_word)
    return re.sub(pattern, new_word, text)

# Test the function
original_text = "The cat sat on the mat. The cat is fat."
updated_text = replace_word(original_text, "cat", "dog")
print("Original Text:", original_text)
print("Updated Text:", updated_text)


Original Text: The cat sat on the mat. The cat is fat.
Updated Text: The dog sat on the mat. The dog is fat.


**Finding all occurrences of a pattern**

In [30]:
import re

def find_all_occurrences(text, pattern):
    return re.findall(pattern, text)

# Sample text
sample_text = "I love cats. Cats are great pets. I also love dogs."
pattern = r'cats?'
occurrences = find_all_occurrences(sample_text.lower(), pattern)
print("Occurrences of 'cat' or 'cats':", occurrences)


Occurrences of 'cat' or 'cats': ['cats', 'cats']


**Compiling regular expressions**

In [36]:
import re

# Compile a regex pattern for better performance
pattern = re.compile(r'\d{3}-\d{2}-\d{4}')  # Social Security Number format

# Test the compiled pattern
text = "My SSN is 123-45-6789."
match = pattern.search(text)
if match:
    print("Found SSN:", match.group())
else:
    print("No SSN found.")


Found SSN: 123-45-6789
