<a href="https://colab.research.google.com/github/pkaiser8/info-664-final/blob/main/PK_final_draft.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Three (or less) Randomized Records from the Collection of the Cooper Hewitt, Smithsonian Design Museum

In [None]:
# Import third party dictionaries:
import pandas as pd
import random

# Establish file path for .CSV data sheet:
data_filepath = '/content/objects-refined.csv'

def load_data(data_filepath):
    """
    Loads data from a CSV file.

    Inputs:
        data_filepath: The file or URL path to the CSV file.

    Returns:
        The loaded DataFrame.
    """
    # Read the data from the .CSV defined in data_filepath.
    # Use low_memory=False to process entire file at once.
    objects_df = pd.read_csv(data_filepath, low_memory=False)
    return objects_df

def group_data(data_filepath, group_column):
    """
    Selects a random group and records.

    Inputs:
        group_column: The key columns to group the data by.

    Returns:
        The grouped DataFrame.
    """
    # Use the groupby() method to group the rows in
    # the DataFrame based on specific values in the two user defined key columns
    grouped_df = objects_df.groupby(group_column)
    return grouped_df

def get_user_input(objects_df):
  """Pulls in user input to establish key selection"""
  # Allow the user to define the desired output by selecting from the key columns outlined in the .CSV file used:
  print(f'Welcome to the Cooper Hewitt, Smithsonian Design Museum collections object randomizer. \nPlease see below a list of keys used to define objects in the collection .CSV file:\n')
  # Prints the .CSV columns so the user can decide which to input:
  print(objects_df.columns)
  print()
  print(f'Select which keys you would like to pair together to randomly find three or less records which share \ncommon values from these elements.\nNote: Some key combinations work better than others. See which ones yield the best results\n')

  # User inputted information for each desired key:
  user_selected_key_1 = input(f'Please enter one of the keys listed above. Remember to include everything between the quotes:\n')
  print()
  user_selected_key_2 = input(f'Please enter a second key:\n')
  print()
  print(f'You have selected "{user_selected_key_1}" and "{user_selected_key_2}" as your grouped keys.')
  return user_selected_key_1, user_selected_key_2

# Run the functions to get user input for key selection
user_selected_key_1, user_selected_key_2 = get_user_input(objects_df)
group_column = [user_selected_key_1, user_selected_key_2]

Welcome to the Cooper Hewitt, Smithsonian Design Museum collections object randomizer. 
Please see below a list of keys used to define objects in the collection .CSV file:

Index(['accession_number', 'creditline', 'date', 'decade', 'department_id',
       'description', 'dimensions', 'dimensions_raw', 'gallery_text', 'id',
       'inscribed', 'is_active', 'is_loan_object', 'justification',
       'label_text', 'markings', 'media_id', 'medium', 'on_display',
       'period_id', 'primary_image', 'primary_image2', 'provenance', 'signed',
       'title', 'title_raw', 'tms:id', 'tombstone', 'type', 'type_id', 'url',
       'videos', 'woe:country', 'woe:country_id', 'woe:country_name',
       'year_acquired', 'year_end', 'year_start'],
      dtype='object')

Select which keys you would like to pair together to randomly find three or less records which share 
common values from these elements.
Note: Some key combinations work better than others. See which ones yield the best results



In [284]:
# This is the ideal number of records the functions below should aim to return:
num_records = 3

def select_random_group(grouped_df, num_records=3):
  """
  Selects a random group key from the grouped DataFrame

  Inputs:
    grouped_df: The grouped DataFrame.
    num_records (int, optional): The number of records to select. Defaults to 3.

  Returns:
    tuple: A tuple containing the selected group key and the random records.
    selected_group_key (string): The key of the selected group.
    random_records (DataFrame): The randomly selected records.
    An error message is printed if no records are found for the selected group.
  """
  # Generates a list of all unique group keys
  # from the DataFrame (grouped_df) and call it group_keys:
  group_keys = list(grouped_df.groups.keys())
  # Randomly selects a group key to explore its values:
  selected_group_key = random.choice(group_keys)
  return selected_group_key

def select_random_records(grouped_df, num_records=3):
  """
  Selects a specified number of random records (up to 3) from the given group.

  Input:
      grouped_df: The grouped DataFrame.
      selected_group_key: The key of the selected group.
      num_records (int, optional): The number of records to select. Defaults to 3.

  Returns:
      tuple: A tuple containing the selected group key and the random records.
  """
  try:
    selected_group_records = grouped_df.get_group(selected_group_key) # Extracts whole records belonging to the selected group
  except KeyError or UnboundLocalError:
    print(f"Could not find records since one group is blank, please run this cell again.\nYou may also reselect your two keys in the cell above and rerun both cells.\n")
    return pd.DataFrame() # Returns an empty DataFrame if a KeyError occurs (indicating that no records were found)

  if len(selected_group_records) >= num_records: # Checks if the group has enough records for the desired sample size.
    random_records = selected_group_records.sample(n=num_records) # Randomly selects at most three records from the group.
  else:
    random_records = selected_group_records #If the group has fewer than three records, return all of them.

  return random_records

# Call the function to get the selected group key
#selected_group_key = select_random_records(grouped_df)

# Call the function to get the random records using the selected group key
# and assign it to random_records
#random_records = select_random_records(grouped_df, num_records)

#grouped_df = load_and_group_data(data_filepath, group_column)

# Call select_random_andrecords
#selected_group_key = select_random_records(grouped_df)

selected_group_key = select_random_group(grouped_df)
random_records = select_random_records(grouped_df, num_records)

print("Selected Group Key:", selected_group_key)
if not random_records.empty:
  print("Randomly Selected Records:")
  print()
  print(random_records)

Selected Group Key: ('Fragment', 'Germany')
Randomly Selected Records:

       accession_number                     creditline               date  \
111828       1902-1-886   Gift of John Pierpont Morgan  13th–14th century   
148274       1956-201-1  Gift of John Davis Hatch, Jr.       17th century   
116411       1902-1-932   Gift of John Pierpont Morgan  13th–15th century   

        decade  department_id  \
111828     NaN       35347501   
148274     NaN       35347501   
116411     NaN       35347501   

                                              description  \
111828  Fragment of printed linen with twining lizard-...   
148274  Damask with a stylized floral border surroundi...   
116411  Repeating pattern in silver printed on a deep ...   

                                               dimensions  dimensions_raw  \
111828           H x W: 35 x 22.2 cm (13 3/4 x 8 3/4 in.)             NaN   
148274  Warp x Weft: 182 x 69 cm (5 ft. 11 5/8 in. x 2...             NaN   
116411    

In [285]:
# Create a dictionary entry for each individual record called and compile them into a list

all_records_data = []
for column in random_records.values:
    record_data = {
        'Image': column[20],
        'Title': column[24],
        'Date': column[2],
        'Medium': column[17],
        'Dimensions': column[6],
        'Type': column[28],
        'Country': column[34],
        'Accession Number': column[0]
    }
# Appends all 1-3 records to a singular dictionary called all_records_data
    all_records_data.append(record_data)

In [297]:
def generate_html_table(random_records):
  """Generates an HTML table to visually display"""

  all_records_data = []
  for column in random_records.values:
    record_data = {
        'Image': column[20],
        'Title': column[24],
        'Date': column[2],
        'Medium': column[17],
        'Dimensions': column[6],
        'Type': column[28],
        'Country': column[34],
        'Accession Number': column[0]
        }
    all_records_data.append(record_data)

  # Build an HTML table, starting with the CSS:
  html_table = """
  <table style='border-collapse: separate; border-spacing: 10px; border: 2px solid #ddd;'>
    <tr>
        <th style='border: 2px dotted #fff; padding: 8px;'>Image</th>
        <th style='border: 2px dotted #fff; padding: 8px;'>Title</th>
        <th style='border: 2px dotted #fff; padding: 8px;'>Date</th>
        <th style='border: 2px dotted #fff; padding: 8px;'>Medium</th>
        <th style='border: 2px dotted #fff; padding: 8px;'>Dimensions</th>
        <th style='border: 2px dotted #fff; padding: 8px;'>Type</th>
        <th style='border: 2px dotted #fff; padding: 8px;'>Country</th>
        <th style='border: 2px dotted #fff; padding: 8px;'>Accession Number</th>
    </tr>
  """

  # Generates an HTML table to visually display the data compiled in the "all_records_data" list made above
  # (1-3 records) stored in all_records_data list defined above

  for record in all_records_data:
    html_table += "<tr>"
    # Check to see if the record contains a valid image link
    if 'Image' in record and pd.notna(record['Image']) != False:
      image_link = record['Image']
    else:
      # Use a placeholder image link to Cooper Hewitt logo if the 'Image' value is missing or empty
      image_link = 'https://upload.wikimedia.org/wikipedia/commons/thumb/f/fa/Cooper_Hewitt%2C_Smithsonian_Design_Museum_logo.svg/320px-Cooper_Hewitt%2C_Smithsonian_Design_Museum_logo.svg.png'

    # Wrap the image in an <a> tag to create a link in the
    # image thumbnail to view the full size picture in a separate tab
    html_table += f"<td style='border: 1px solid #ddd; padding: 8px;'><a href='{image_link}' target='_blank'><img src='{image_link}' width='100'></a></td>"
    for key, value in record.items():
        if key != 'Image':  # Skip 'Image' as it's already handled
            html_table += f"<td style='border: 1px solid #ddd; padding: 8px;'>{value}</td>"
    html_table += "</tr>"

  html_table += "</table>"
  return html_table

# Import the HTML from IPython.display
# Source: https://ipython.readthedocs.io/en/8.26.0/api/generated/IPython.display.html
from IPython.display import HTML
html_table = generate_html_table(random_records)
display(HTML(html_table))

Image,Title,Date,Medium,Dimensions,Type,Country,Accession Number
,"Fragment (Germany), 13th–14th century",13th–14th century,Linen,H x W: 35 x 22.2 cm (13 3/4 x 8 3/4 in.),Fragment,Germany,1902-1-886
,"Fragment (Germany), 17th century",17th century,Linen,Warp x Weft: 182 x 69 cm (5 ft. 11 5/8 in. x 27 3/16 in.),Fragment,Germany,1956-201-1
,"Fragment (Germany), 13th–15th century",13th–15th century,"Linen, tin",H x W: 27.6 x 20 cm (10 7/8 x 7 7/8 in.),Fragment,Germany,1902-1-932
