# Using Python to Interact with the Operating System

In [2]:
%matplotlib inline
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys
import re

## Week 2 managing files with Python

read and write csv files exercise

In [1]:
# q1
import os
import csv

# Create a file with data in it
def create_file(filename):
  with open(filename, "w") as file:
    file.write("name,color,type\n")
    file.write("carnation,pink,annual\n")
    file.write("daffodil,yellow,perennial\n")
    file.write("iris,blue,perennial\n")
    file.write("poinsettia,red,perennial\n")
    file.write("sunflower,yellow,annual\n")


# Read the file contents and format the information about each row
def contents_of_file(filename):
  return_string = ""

  # Call the function to create the file 
  create_file(filename)

  # Open the file
  with open(filename) as file:
    # Read the rows of the file into a dictionary
    reader = csv.DictReader(file)
    # Process each item of the dictionary
    for row in reader:
      return_string += "a {} {} is {}\n".format(row["color"], row["name"], row["type"])
  return return_string


#Call the function
print(contents_of_file("flowers.csv"))

a pink carnation is annual
a yellow daffodil is perennial
a blue iris is perennial
a red poinsettia is perennial
a yellow sunflower is annual



In [None]:
# q2
import os
import csv

# Create a file with data in it
def create_file(filename):
  with open(filename, "w") as file:
    file.write("name,color,type\n")
    file.write("carnation,pink,annual\n")
    file.write("daffodil,yellow,perennial\n")
    file.write("iris,blue,perennial\n")
    file.write("poinsettia,red,perennial\n")
    file.write("sunflower,yellow,annual\n")

# Read the file contents and format the information about each row
def contents_of_file(filename):
  return_string = ""

  # Call the function to create the file 
  create_file(filename)

  # Open the file
  with open(filename) as file:
    # Read the rows of the file
    rows = csv.reader(file)
    # Process each row
    linecount = 0
    for row in rows:
      flower, color, freq = row
      # Format the return string for data rows only
      if linecount == 0:
        linecount += 1
      else:
        return_string += "a {} {} is {}\n".format(color, flower, freq)
        linecount += 1
  return return_string

#Call the function
print(contents_of_file("flowers.csv"))

## Week 3 regular expressions

### 3.1 basic regular expressions

In [2]:
import re

In [5]:
print(re.search(r"aza", "ceazar"))
print(re.search(r"aza", "cewazaraza"))

<re.Match object; span=(2, 5), match='aza'>
<re.Match object; span=(3, 6), match='aza'>


In [8]:
print(re.search(r"Py[a-z]*n", "Python Programming"))
print(re.search(r"Py[a-zA-Z ]*n", "Python Programming"))

<re.Match object; span=(0, 6), match='Python'>
<re.Match object; span=(0, 17), match='Python Programmin'>


 Exercises

q1 The check_web_address function checks if the text passed qualifies as a top-level web address, meaning that it contains alphanumeric characters (which includes letters, numbers, and underscores), as well as periods, dashes, and a plus sign, followed by a period and a character-only top-level domain such as ".com", ".info", ".edu", etc. Fill in the regular expression to do that, using escape characters, wildcards, repetition qualifiers, beginning and end-of-line characters, and character classes.

In [11]:
import re
def check_web_address(text):
  pattern = r"[\w\.\-\+]+[\.][a-zA-Z]+$"
  result = re.search(pattern, text)
  return result != None

In [12]:
print(check_web_address("gmail.com"))

True


q2 The check_time function checks for the time format of a 12-hour clock, as follows: the hour is between 1 and 12, with no leading zero, followed by a colon, then minutes between 00 and 59, then an optional space, and then AM or PM, in upper or lower case. Fill in the regular expression to do that. How many of the concepts that you just learned can you use here?

In [13]:
import re
def check_time(text):
  pattern = r"[1-9][0-2]*:*[0-5][0-9] *(am|pm|AM|PM)"
  result = re.search(pattern, text)
  return result != None

print(check_time("12:45pm")) # True
print(check_time("9:59 AM")) # True
print(check_time("6:60am")) # False
print(check_time("five o'clock")) # False

True
True
False
False


q3 The contains_acronym function checks the text for the presence of 2 or more characters or digits surrounded by parentheses, with at least the first character in uppercase (if it's a letter), returning True if the condition is met, or False otherwise. For example, "Instant messaging (IM) is a set of communication technologies used for text-based communication" should return True since (IM) satisfies the match conditions." Fill in the regular expression in this function: 

In [14]:
import re
def contains_acronym(text):
  pattern = r"\([A-Z0-9][a-zA-Z0-9]+\)"
  result = re.search(pattern, text)
  return result != None

print(contains_acronym("Instant messaging (IM) is a set of communication technologies used for text-based communication")) # True
print(contains_acronym("American Standard Code for Information Interchange (ASCII) is a character encoding standard for electronic communication")) # True
print(contains_acronym("Please do NOT enter without permission!")) # False
print(contains_acronym("PostScript is a fourth-generation programming language (4GL)")) # True
print(contains_acronym("Have fun using a self-contained underwater breathing apparatus (Scuba)!")) # True

True
True
False
True
True


q6 Fill in the code to check if the text passed includes a possible U.S. zip code, formatted as follows: exactly 5 digits, and sometimes, but not always, followed by a dash with 4 more digits. The zip code needs to be preceded by at least one space, and cannot be at the start of the text.

In [15]:
import re
def check_zip_code (text):
  result = re.search(r"( )[0-9]{5}(\-[0-9]{4})*", text)
  return result != None

print(check_zip_code("The zip codes for New York are 10001 thru 11104.")) # True
print(check_zip_code("90210 is a TV show")) # False
print(check_zip_code("Their address is: 123 Main Street, Anytown, AZ 85258-0001.")) # True
print(check_zip_code("The Parliament of Canada is at 111 Wellington St, Ottawa, ON K1A0A9.")) # False

True
False
True
False


### 3.2 advanced regular expressions

In [8]:
import re
def rearrange_name(name):
  result = re.search(r"^([\w\. \-]*), ([\w\. \-]*)$", name)
  if result == None:
    return name
  return "{} {}".format(result[2], result[1])

name = rearrange_name("Kennedy, John F.")
print(name)
name = rearrange_name("Kennedy, John-F.")
print(name)

John F. Kennedy
John-F. Kennedy


In [5]:
name = "Alice, Chen, Liu"
result = re.search(r"^(\w*), (\w*), ([\w\.\ ]*)$", name)
print(result)
print(result.groups())

<re.Match object; span=(0, 16), match='Alice, chen, liu'>
('Alice', 'chen', 'liu')


Exercises

Q2 Returns all words iwth 3 or more consecutive vowels.

In [2]:
import re
def multi_vowel_words(text):
  pattern = r"\b\w*[aeiou]{3,}\w*\b"
  result = re.findall(pattern, text)
  return result

print(multi_vowel_words("Life is beautiful")) 
# ['beautiful']

print(multi_vowel_words("Obviously, the queen is courageous and gracious.")) 
# ['Obviously', 'queen', 'courageous', 'gracious']

print(multi_vowel_words("The rambunctious children had to sit quietly and await their delicious dinner.")) 
# ['rambunctious', 'quietly', 'delicious']

print(multi_vowel_words("The order of a data queue is First In First Out (FIFO)")) 
# ['queue']

print(multi_vowel_words("Hello world!")) 
# []

['beautiful']
['Obviously', 'queen', 'courageous', 'gracious']
['rambunctious', 'quietly', 'delicious']
['queue']
[]


Q5 US phone number conversion

In [21]:
phone = "1123-123-1234"
regex = r"(\b[\d]{3})-([\d]{3})-([\d]{4}\b)"

result = re.search(regex, phone)
print(result)
#print(result.groups())

result2 = re.sub(regex, r"(\1) \2-\3", phone)
print(result2)

None
1123-123-1234


In [22]:
import re
def convert_phone_number(phone):
  result = re.sub(r"\b([\d]{3})-([\d]{3})-([\d]{4}\b)", r"(\1) \2-\3", phone)
  return result

print(convert_phone_number("My number is 212-345-9999.")) # My number is (212) 345-9999.
print(convert_phone_number("Please call 888-555-1234")) # Please call (888) 555-1234
print(convert_phone_number("123-123-12345")) # 123-123-12345
print(convert_phone_number("Phone number of Buckingham Palace is +44 303 123 7300")) # Phone number of Buckingham Palace is +44 303 123 7300

My number is (212) 345-9999.
Please call (888) 555-1234
123-123-12345
Phone number of Buckingham Palace is +44 303 123 7300


## Week 4 managing data and processes

### 4.1 data streams

In [4]:
my_number = input('Please Enter a Number: \n')

Please Enter a Number: 
97


In [5]:
print(my_number)

97
