#  Python for data science
###  python is best learned through types
###  4 basic types   -  int, float, str,  bool  
###  5 iterable types  - list, set, tuple,  dict,  range  
###  you can get the type of a variable  with the type() function ,  and list the properties/methods with the dir()  function.   
###  print(x,y,z)  print()  allows printing multiple variables.

#  C and Java use arrays which have homogeneous types in a contiguous block of memory.  Python uses lists,  which allow heterogenious types. 

##  for x in y  or  "for in" loops    

In [12]:
mylist = [1,True,3.14,"hello"]
for item in mylist:
    print(item)

1
True
3.14
hello


### .split() is a string (class 'str')   method.  it can be used as a shortcut for creating lists of strings

In [135]:
mywords = "one two three four".split()
print(mywords)
print(type(mywords))
print(len(mywords))    #  len() gives the number of items in an iterable

['one', 'two', 'three', 'four']
<class 'list'>
4


In [48]:
for item in "here are some words".split():
    print(item)

here
are
some
words


### 'str' object has many properties and methods, 81 as is shown by the len() function.  

In [127]:
list_of_properties = dir(str)
print(type(list_of_properties))
print(f"{len(list_of_properties)} total properties ")      # f-strings,  introduced in python 3.6 .   
print("excluding dunder items:")
for item in list_of_properties:
    if not item.startswith('__'):
        print(item)

<class 'list'>
81 total properties 
excluding dunder items:
capitalize
casefold
center
count
encode
endswith
expandtabs
find
format
format_map
index
isalnum
isalpha
isascii
isdecimal
isdigit
isidentifier
islower
isnumeric
isprintable
isspace
istitle
isupper
join
ljust
lower
lstrip
maketrans
partition
removeprefix
removesuffix
replace
rfind
rindex
rjust
rpartition
rsplit
rstrip
split
splitlines
startswith
strip
swapcase
title
translate
upper
zfill


#  in performs a membership test

In [56]:
if 'strip' in list_of_properties:
    print("yes, found strip in the list")
else:
    print("didn't find it in the list")    # I can put a single quote inside a double quote list.  

yes, found strip in the list


###  what was just before it and just after?

In [59]:
foundat = list_of_properties.index('strip')
print(f" found it at position {foundat} between {list_of_properties[foundat-1]} and {list_of_properties[foundat+1]}  ")

 found it at position 75 between startswith and swapcase  


# range(start, stop) creates a range object,  which can then be iterated over.  

In [10]:
range(0,5)   # creates range object  

range(0, 5)

In [11]:
type(range(5))

range

In [7]:
for i in range(3):
    print(i)

0
1
2


# objects can be cast to other types

In [16]:
myrange = range(0,3)  
mylist = list(myrange)
print(myrange, type(myrange))
print(mylist, type(mylist))

range(0, 3) <class 'range'>
[0, 1, 2] <class 'list'>


In [15]:
myset = set(myrange)
print(myset, type(myset))

{0, 1, 2} <class 'set'>


In [21]:
myset[0]  #  sets have no index property

TypeError: 'set' object is not subscriptable

# dictionaries use {} like sets, but consist of key:value pairs.  

In [22]:
mydict = {7:8, 9:10}   # perfectly valid to use integers for keys  

In [25]:
mydict[0]  # dictionaries only care about keys, not positional indexes

KeyError: 0

In [26]:
mydict[7]  # there is a 7 key  

8

## dictionaries are lookup tables

In [None]:
statecapitols = { "CA":"Sacramento", "OR":"Salem", "WA":"Olympia" }   

###  because regular lists would be mutable, .keys() and .values()  return special immutable lists. there are many, many types that can be iterated over.     

In [60]:
print(statecapitols.keys())
print(statecapitols.values())

dict_keys(['CA', 'OR', 'WA'])
dict_values(['Sacramento', 'Salem', 'Olympia'])


## these can be cast to regular lists. 

In [63]:
listkeys = list(statecapitols.keys())
listvalues = list(statecapitols.values())
print(f"there are {len(listkeys)} keys and {len(listvalues)} values")
print(listkeys)
print(listvalues)

there are 3 keys and 3 values
['CA', 'OR', 'WA']
['Sacramento', 'Salem', 'Olympia']


## dict_keys and dict_values can be iterated over just like regular lists. 

In [28]:
for key in statecapitols.keys():
    print(key, statecapitols[key]) 

CA Sacramento
OR Salem
WA Olympia


##  .keys() is assumed when just the dictionary is given

In [29]:
for key in statecapitols:  
    print(key, statecapitols[key]) 

CA Sacramento
OR Salem
WA Olympia


## .items() returns a list of tuples.  python allows for nested containers.

In [65]:
aslist = list(statecapitols.items())
print(type(aslist))
print(aslist)

<class 'list'>
[('CA', 'Sacramento'), ('OR', 'Salem'), ('WA', 'Olympia')]


## the list can be iterated over, returning a tuple each time. 

In [47]:
for atuple in statecapitols.items():
    print(atuple)

('CA', 'Sacramento')
('OR', 'Salem')
('WA', 'Olympia')


## since python allows for multiple assignment,  the tuple can be unpacked each loop, making the code very readable

In [31]:
for key,value in statecapitols.items():
    print(key,value)

CA Sacramento
OR Salem
WA Olympia


In [40]:
print(aslist[0]) 
print(type(aslist[0]))

('CA', 'Sacramento')
<class 'tuple'>


In [41]:
print(aslist[0][1]) 
print(type(aslist[0][1]))

Sacramento
<class 'str'>


##  for cases when the positional index is needed, enumerate()  takes an enumerable and returns a tuple of the index and value

In [78]:
enumerateobj = enumerate([1,2,3])   # some classes will just show the address of the object in memory
print(enumerateobj)
print(list(enumerateobj))   # casting it to a list makes it readable

<enumerate object at 0x7fdf97b28c20>
[(0, 1), (1, 2), (2, 3)]


In [80]:
for index,item in enumerate(statecapitols.items()):  # explicitly calling items() to get both keys and values.   
    print(index,item)

0 ('CA', 'Sacramento')
1 ('OR', 'Salem')
2 ('WA', 'Olympia')


###  output is a list of 2-tuples (index,item) where item is a 2-tuple as well.  if I try to unpack it as a single tuple, I get a ValueError

In [83]:
for index,key,value in enumerate(statecapitols.items()): 
    print(index,key,value)

ValueError: not enough values to unpack (expected 3, got 2)

###  I can unpack inner and outer tuples at once

In [84]:
for (index,(key,value)) in enumerate(statecapitols.items()): 
    print(index,key,value)

0 CA Sacramento
1 OR Salem
2 WA Olympia


In [85]:
list(enumerate(statecapitols))   # enumerating a dictionary assumes keys() by default

[(0, 'CA'), (1, 'OR'), (2, 'WA')]

#  strings ( class 'str')   can use single or double quotes.  """ triple quotes allows multi-line strings.  

In [43]:
"hello " + 'world'  

'hello world'

##  a gotcha of strings is that they are iterable.

In [133]:
wordlist = "here are some words".split()
print(wordlist,type(wordlist))
for word in wordlist[0]:  # a list of strings is like a 2 dimensional array of characters    
    print(word, type(word))

['here', 'are', 'some', 'words'] <class 'list'>
h <class 'str'>
e <class 'str'>
r <class 'str'>
e <class 'str'>


In [94]:
datestr = "Oct 1, 2024" 
recipient = "resident at 2424 Oak Dr"
text = "you may already be a winner!"

mytemplate = f"""
   {datestr}
   Dear {recipient},  
   {text} 
"""
print(mytemplate)


   Oct 1, 2024
   Dear resident at 2424 Oak Dr,  
   you may already be a winner! 



###  sets allow union, intersection, subtraction, and symmetric difference. I can use subtraction to filter out methods from an ancestor class

In [87]:
rootmethodset = set(dir(object))
print(rootmethodset)

{'__gt__', '__lt__', '__doc__', '__reduce_ex__', '__sizeof__', '__ne__', '__getattribute__', '__le__', '__repr__', '__eq__', '__init_subclass__', '__dir__', '__reduce__', '__new__', '__ge__', '__delattr__', '__class__', '__setattr__', '__hash__', '__str__', '__init__', '__getstate__', '__subclasshook__', '__format__'}


In [88]:
listmethodset = set(dir(list))
print( listmethodset - rootmethodset )

{'insert', 'append', 'sort', '__reversed__', 'index', '__len__', '__iter__', 'count', '__mul__', 'pop', '__getitem__', '__setitem__', '__delitem__', 'clear', '__iadd__', '__rmul__', 'extend', 'remove', '__class_getitem__', '__contains__', '__imul__', 'reverse', 'copy', '__add__'}


In [89]:
setmethodset = set(dir(set))
print( setmethodset - rootmethodset )

{'__sub__', '__ior__', 'union', 'difference', '__rxor__', 'discard', 'update', '__or__', 'issubset', '__len__', '__rsub__', '__iter__', 'add', '__ixor__', 'difference_update', 'symmetric_difference', 'pop', 'intersection_update', '__iand__', 'intersection', 'clear', 'isdisjoint', '__isub__', 'symmetric_difference_update', 'remove', '__and__', '__class_getitem__', '__contains__', 'copy', '__xor__', '__rand__', 'issuperset', '__ror__'}


##  what do lists and sets have in common, aside from what all types have?  

In [91]:
print( (listmethodset & setmethodset) - rootmethodset )

{'clear', '__len__', '__iter__', 'copy', 'remove', 'pop', '__contains__', '__class_getitem__'}


###  what do lists and dicts have in common, aside from what all types have?

In [92]:
dictmethodset = set(dir(dict))
print( (listmethodset & dictmethodset) - rootmethodset )

{'__setitem__', '__delitem__', 'clear', '__len__', '__iter__', 'copy', 'pop', '__getitem__', '__contains__', '__class_getitem__', '__reversed__'}


In [95]:
from random import shuffle, choices, sample

#  shuffle will sort an existing list inplace. 

In [96]:
mylist = list(range(10))
print(mylist)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


In [97]:
shuffle(mylist)
print(mylist)

[6, 3, 1, 0, 5, 8, 7, 2, 4, 9]


### you need a list for shuffle to work.   but the errors aren't always obvious.  

In [105]:
shuffle(listmethodset)  # shuffling a set will give a TypeError  

TypeError: 'set' object is not subscriptable

In [100]:
mytuple = tuple(range(10))
print(type(mytuple))
print(mytuple)

<class 'tuple'>
(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)


In [107]:
shuffle(mytuple) # shuffling a tuple will give a different TypeError.  the (identical)  stacktrace shows the problem has to do with item assignment.  

TypeError: 'tuple' object does not support item assignment

##  the shuffled list can now be sorted  with the sorted() function,  which creates a NEW list, and does not modify the input one.  

In [108]:
print(mylist)
new_ascending_sorted_list = sorted(mylist)
print(new_ascending_sorted_list)

[6, 3, 1, 0, 5, 8, 7, 2, 4, 9]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


In [137]:
alphabet = "alpha bravo charlie delta echo foxtrot golf hotel indigo juliet kilo lima mike november papa oscar quebec ... "  

In [122]:
result = shuffle(alphabet.split())     #  what happens here?  an anonymous list object is passed to shuffle, so no way to see the modified list. shuffle always returns None.

In [124]:
print(result)

None


In [138]:
alpha_list = alphabet.split()
print(alpha_list)
shuffle(alpha_list)
print(alpha_list)

['alpha', 'bravo', 'charlie', 'delta', 'echo', 'foxtrot', 'golf', 'hotel', 'indigo', 'juliet', 'kilo', 'lima', 'mike', 'november', 'papa', 'oscar', 'quebec', '...']
['mike', 'foxtrot', 'kilo', 'charlie', 'oscar', 'alpha', 'echo', 'indigo', 'quebec', '...', 'golf', 'hotel', 'papa', 'delta', 'november', 'lima', 'bravo', 'juliet']


##  do an inplace sort on alpha_list, like shuffle did 

In [139]:
alpha_list.sort()
print(alpha_list)

['...', 'alpha', 'bravo', 'charlie', 'delta', 'echo', 'foxtrot', 'golf', 'hotel', 'indigo', 'juliet', 'kilo', 'lima', 'mike', 'november', 'oscar', 'papa', 'quebec']


##  sorted  creates a new list and does not modify the original iterable  

In [144]:
  # if I wanted a shuffed tuple, I'd have to make it a list, shuffle it, then convert back to a tuple.    
randomtuple = ("it", "was", "the", "best", "of", "times", "it", "was", "the", "worst", "of", "times") 
print(type(randomtuple))
forwardsortedlist = sorted(randomtuple)
print(forwardsortedlist)

<class 'tuple'>
['best', 'it', 'it', 'of', 'of', 'the', 'the', 'times', 'times', 'was', 'was', 'worst']


In [142]:
reversedlist = sorted(randomtuple, reverse=True)
print(reversedlist)

['worst', 'was', 'was', 'times', 'times', 'the', 'the', 'of', 'of', 'it', 'it', 'best']


In [146]:
randomtuple  # unchanged

('it',
 'was',
 'the',
 'best',
 'of',
 'times',
 'it',
 'was',
 'the',
 'worst',
 'of',
 'times')