In [64]:
# What you type...	What you get	   example 	        print(example)
# \'	                '	        'What\'s up?'	       What's up?
# \"	                "	        "That's \"cool\""	   That's "cool"
# \\	                \	"Look, a mountain: /\\"	       Look, a mountain: /\
# \n	
# "1\n2 3"	                                                        1
                                                                  # 2 3

In [65]:
# In addition, Python's triple quote syntax for strings lets us include newlines literally (i.e. by just hitting 'Enter' on our keyboard, rather than using the special '\n' sequence). We've already seen this in the docstrings we use to document our functions, but we can use them anywhere we want to define a string.

triplequoted_hello = """hello
world"""
print(triplequoted_hello)


hello
world


In [66]:
# The print() function automatically adds a newline character unless we specify a value for the keyword argument end other than the default value of '\n':

print("hello")
print("world")
print("hello", end='')
print("pluto", end='')



hello
world
hellopluto

In [67]:
# Indexing
planet = 'Pluto'
planet[0]

'P'

In [68]:
# Slicing
planet[-3:]

'uto'

In [69]:
# How long is this string?
len(planet)

5

In [70]:
# Yes, we can even loop over them
#char == character
[char+'! ' for char in planet]

['P! ', 'l! ', 'u! ', 't! ', 'o! ']

In [71]:
#But a major way in which they differ from lists is that they are immutable. We can't modify them.

planet[0] = 'B'
# planet.append doesn't work either

TypeError: 'str' object does not support item assignment

In [None]:
# ALL CAPS
claim = "Pluto is a planet!"
claim.upper()

'PLUTO IS A PLANET!'

In [None]:
# all lowercase
claim.lower()

'pluto is a planet!'

In [None]:
# Searching for the first index of a substring
claim.index('plan')

11

In [None]:
claim.startswith(planet)

True

In [None]:
# false because of missing exclamation mark
claim.endswith('planet')

False

In [None]:
claim.split()

['Pluto', 'is', 'a', 'planet!']

In [None]:
datestr = '1956-01-31'
year, month, day = datestr.split('-')
year, month, day

('1956', '01', '31')

In [None]:
'/'.join([month, day, year])

'01/31/1956'

In [None]:
# Yes, we can put unicode characters right in our string literals :)
' 👏 '.join([word.upper() for word in words])

'PLUTO 👏 IS 👏 A 👏 PLANET!'

In [None]:
planet='pluto'
planet + ', we miss you.'

'pluto, we miss you.'

In [None]:
#If we want to throw in any non-string objects, we have to be careful to call str() on them first

position = 9
planet + ", you'll always be the " + position + "th planet to me."

"pluto, you'll always be the 9th planet to me."

In [None]:
# position = '9'
# planet + ", you'll always be the " + position + "th planet to me."

In [None]:
planet + ", you'll always be the " + str(position) + "th planet to me."


"pluto, you'll always be the 9th planet to me."

In [None]:
#Format
#.format() on a "format string", where the Python values we want to insert are represented with {} placeholders.  we didn't even have to call str() to convert position from an int. format() takes care of that for us.
"{}, youll always be {}th planet to me".format(planet, position)

'Pluto, youll always be 9th planet to me'

In [74]:
pluto_mass = 1.303 * 10**22
earth_mass = 5.9722 * 10**24
population = 52910390
#         2 decimal points   3 decimal points, format as percent     separate with commas
"{} weighs about {:.2} kilograms ({:.3%} of Earth's mass). It is home to {:,} Plutonians.".format(
    planet, pluto_mass, pluto_mass / earth_mass, population)

"Pluto weighs about 1.3e+22 kilograms (0.218% of Earth's mass). It is home to 52,910,390 Plutonians."

In [75]:
# Referring to format() arguments by index, starting from 0
s = """Pluto's a {0}.
No, it's a {1}.
{0}!
{1}!""".format('planet', 'dwarf planet')
print(s)

Pluto's a planet.
No, it's a dwarf planet.
planet!
dwarf planet!


In [76]:
# Dictionaries
# Dictionaries are a built-in Python data structure for mapping keys to values.
numbers={'one':1,'two':2,'three':3}

In [77]:
numbers['one']

1

In [78]:
#We can use the same syntax to add another key, value pair

numbers['eleven'] = 11
numbers

{'one': 1, 'two': 2, 'three': 3, 'eleven': 11}

In [79]:
#Or to change the value associated with an existing key

numbers['one'] = 'Pluto'
numbers

{'one': 'Pluto', 'two': 2, 'three': 3, 'eleven': 11}

In [81]:
#onary comprehensions
planets = ['Mercury', 'Venus', 'Earth', 'Mars', 'Jupiter', 'Saturn', 'Uranus', 'Neptune']
planet_to_initial={planet:planet[0] for planet in planets}
planet_to_initial

{'Mercury': 'M',
 'Venus': 'V',
 'Earth': 'E',
 'Mars': 'M',
 'Jupiter': 'J',
 'Saturn': 'S',
 'Uranus': 'U',
 'Neptune': 'N'}

In [82]:
#The in operator tells us whether something is a key in the dictionary

'Saturn' in planet_to_initial

True

In [84]:
#A for loop over a dictionary will loop over its keys

for k in numbers:
    print('{}={}'.format(k,numbers[k]))

one=Pluto
two=2
three=3
eleven=11


In [87]:
#We can access a collection of all the keys or all the values with dict.keys() and dict.values(), respectively.

# Get all the initials, sort them alphabetically, and put them in a space-separated string.
' '.join(sorted(planet_to_initial.values()))


'E J M M N S U V'

In [88]:
' '.join(sorted(planet_to_initial.values()))

'E J M M N S U V'

In [98]:
planet_to_initial.items()

dict_items([('Mercury', 'M'), ('Venus', 'V'), ('Earth', 'E'), ('Mars', 'M'), ('Jupiter', 'J'), ('Saturn', 'S'), ('Uranus', 'U'), ('Neptune', 'N')])

In [97]:
#The very useful dict.items() method lets us iterate over the keys and values of a dictionary simultaneously. (In Python jargon, an item refers to a key, value pair)
# .rjust() is right justify

for planet, initial in planet_to_initial.items():
    print("{} begins with \"{}\"".format(planet.rjust(10), initial))

   Mercury begins with "M"
     Venus begins with "V"
     Earth begins with "E"
      Mars begins with "M"
   Jupiter begins with "J"
    Saturn begins with "S"
    Uranus begins with "U"
   Neptune begins with "N"


In [102]:
#It removes the spaces from both the start and end, but not in the middle.
#str.strip() 

In [100]:
#There is a saying that "Data scientists spend 80% of their time cleaning data, and 20% of their time complaining about cleaning data." Let's see if you can write a function to help clean US zip code data. Given a string, it should return whether or not that string represents a valid zip code. For our purposes, a valid zip code is any string consisting of exactly 5 digits. HINT: str has a method that will be useful here. Use help(str) to review a list of string methods.
def is_valid_zip(zip_code):
    """Returns whether the input string is a valid (5 digit) zip code
    """
    return len(zip_code)==5 and zip_code.isdigit()

In [104]:
# A researcher has gathered thousands of news articles. But she wants to focus her attention on articles including a specific word. Complete the function below to help her filter her list of articles.

# Your function should meet the following criteria:

# Do not include documents where the keyword string shows up only as a part of a larger word. For example, if she were looking for the keyword “closed”, you would not include the string “enclosed.”
# She does not want you to distinguish upper case from lower case letters. So the phrase “Closed the case.” would be included when the keyword is “closed”
# Do not let periods or commas affect what is matched. “It is closed.” would be included when the keyword is “closed”. But you can assume there are no other types of punctuation.

def word_search(doc_list, keyword):
    """
    Takes a list of documents (each document is a string) and a keyword. 
    Returns list of the index values into the original list for all documents 
    containing the keyword.

    Example:
    doc_list = ["The Learn Python Challenge Casino.", "They bought a car", "Casinoville"]
    >>> word_search(doc_list, 'casino')
    >>> [0]
    """
    # list to hold the indices of matching documents
    indices = [] 
    # Iterate through the indices (i) and elements (doc) of documents
    for i, doc in enumerate(doc_list):
        # Split the string doc into a list of words (according to whitespace)
        tokens = doc.split()
        # Make a transformed list where we 'normalize' each word to facilitate matching.
        # Periods and commas are removed from the end of each word, and it's set to all lowercase.
        normalized = [token.rstrip('.,').lower() for token in tokens]
        # Is there a match? If so, update the list of matching indices.
        if keyword.lower() in normalized:
            indices.append(i)
    return indices

In [105]:
# Now the researcher wants to supply multiple keywords to search for. Complete the function below to help her.

# (You're encouraged to use the word_search function you just wrote when implementing this function. Reusing code in this way makes your programs more robust and readable - and it saves typing!)

def multi_word_search(doc_list, keywords):
    """
    Takes list of documents (each document is a string) and a list of keywords.  
    Returns a dictionary where each key is a keyword, and the value is a list of indices
    (from doc_list) of the documents containing that keyword

    >>> doc_list = ["The Learn Python Challenge Casino.", "They bought a car and a casino", "Casinoville"]
    >>> keywords = ['casino', 'they']
    >>> multi_word_search(doc_list, keywords)
    {'casino': [0, 1], 'they': [1]}
    """
    keyword_to_indices = {}
    for keyword in keywords:
        keyword_to_indices[keyword] = word_search(documents, keyword)
    return keyword_to_indices