princetonvisualai · Angelina-Wang · May 4, 2021 · Apr 20, 2021 · Apr 20, 2021 · Apr 22, 2021
diff --git a/README.md b/README.md
@@ -81,7 +81,7 @@ geo_tag: Counts the number of tags from each region, as well as extracts AlexNet
 
 geo_lng: Counts the languages that make up the image tags, and whether or not they are local to the country the image is from. Also extracts image-level features to compare if locals and tourist portray a country differently
 
-Note: Geography-Based analyses require a mapping from images to location. The 2 primary ways we've encountered these mappings in existing datasets are geography labels (ie. String formatted locations like 'Manhattan'), and GPS labels (latitude and longitude coordinate pairs). Our analyses supports both types of geography mappings. Namely, the user should specify in their dataset class the `geography_info_type` to be one of the following:
+Note: Geography-Based analyses require a mapping from images to location. The 2 formats of geography annotations supported are (ie. String formatted locations like 'Manhattan'), and GPS labels (latitude and longitude coordinate pairs). Namely, the user should specify in their dataset class the `geography_info_type` to be one of the following:
 - 'GPS_LABEL': datasets with mappings from image to GPS coordinates
 - 'STRING_FORMATTED_LABEL', datasets with mappings from image to string-formatted labels
 

diff --git a/analysis_notebooks/Geography Analysis.ipynb b/analysis_notebooks/Geography Analysis.ipynb
@@ -724,8 +724,8 @@
    "outputs": [],
    "source": [
     "if (dataset.geography_info_type == \"STRING_FORMATTED_LABEL\" and dataset.geography_label_string_type == \"COUNTRY_LABEL\"):\n",
-    "    if not os.path.exists(\"results/{0}/6\".format(folder_name)):\n",
-    "        os.mkdir(\"results/{0}/6\".format(folder_name))\n",
+    "    if not os.path.exists(\"results/{0}/geo_tag\".format(folder_name)):\n",
+    "        os.mkdir(\"results/{0}/geo_tag\".format(folder_name))\n",
     "    info_stats = pickle.load(open(\"results/{}/geo_tag.pkl\".format(folder_name), \"rb\")) #20GB\n",
     "    country_tags = info_stats['country_tags']\n",
     "    tag_to_subregion_features = info_stats['tag_to_subregion_features']\n",
@@ -741,7 +741,7 @@
     "        subregion_tags[subregion] = np.add(subregion_tags[subregion], counts)\n",
     "    total_counts = total_counts.astype(int)\n",
     "    sum_total_counts = int(np.sum(total_counts))\n",
-    "    if not os.path.exists('checkpoints/{}/6_a.pkl'.format(folder_name)):\n",
+    "    if not os.path.exists('checkpoints/{}/geo_tag_a.pkl'.format(folder_name)):\n",
     "        pvalues_over = {} # pvalue : '[country]: [tag] (country num and total num info for now)'\n",
     "        pvalues_under = {} \n",
     "        for country, counts in country_tags.items():\n",
@@ -760,7 +760,7 @@
     "                else:\n",
     "                    pvalues_under[p] = tag_info\n",
     "    else:\n",
-    "        pvalues_under, pvalues_over = pickle.load(open('checkpoints/{}/6_a.pkl'.format(folder_name), 'rb'))\n",
+    "        pvalues_under, pvalues_over = pickle.load(open('checkpoints/{}/geo_tag_a.pkl'.format(folder_name), 'rb'))\n",
     "    subregion_pvalues_over = {}\n",
     "    subregion_pvalues_under = {}\n",
     "    for subregion, counts in subregion_tags.items():\n",
@@ -778,9 +778,8 @@
     "                subregion_pvalues_under[p] = tag_info\n",
     "                \n",
     "elif (dataset.geography_info_type == \"STRING_FORMATTED_LABEL\" and dataset.geography_label_string_type == \"REGION_LABEL\"):\n",
-    "    print(\"Geo_tag work for region label formatted dataset\")\n",
-    "    if not os.path.exists(\"results/{0}/6\".format(folder_name)):\n",
-    "        os.mkdir(\"results/{0}/6\".format(folder_name))\n",
+    "    if not os.path.exists(\"results/{0}/geo_tag\".format(folder_name)):\n",
+    "        os.mkdir(\"results/{0}/geo_tag\".format(folder_name))\n",
     "    info_stats = pickle.load(open(\"results/{}/geo_tag_region.pkl\".format(folder_name), \"rb\")) #20GB\n",
     "    region_tags = info_stats['region_tags']\n",
     "    tag_to_region_features = info_stats['tag_to_region_features']\n",
@@ -794,7 +793,7 @@
     "    total_counts = total_counts.astype(int)\n",
     "    sum_total_counts = int(np.sum(total_counts))\n",
     "\n",
-    "    if not os.path.exists('checkpoints/{}/6_a.pkl'.format(folder_name)):\n",
+    "    if not os.path.exists('checkpoints/{}/geo_tag_a.pkl'.format(folder_name)):\n",
     "        pvalues_over = {} # pvalue : '[region]: [tag] (region num and total num info for now)'\n",
     "        pvalues_under = {} \n",
     "        for region, counts in region_tags.items():\n",
@@ -813,12 +812,11 @@
     "                else:\n",
     "                    pvalues_under[p] = tag_info\n",
     "    else:\n",
-    "        pvalues_under, pvalues_over = pickle.load(open('checkpoints/{}/6_a.pkl'.format(folder_name), 'rb'))\n",
+    "        pvalues_under, pvalues_over = pickle.load(open('checkpoints/{}/geo_tag_a.pkl'.format(folder_name), 'rb'))\n",
     "\n",
     "elif dataset.geography_info_type == \"GPS_LABEL\":\n",
-    "    print(\"Geo_tag work for gps formatted dataset\")\n",
-    "    if not os.path.exists(\"results/{0}/6\".format(folder_name)):\n",
-    "        os.mkdir(\"results/{0}/6\".format(folder_name))\n",
+    "    if not os.path.exists(\"results/{0}/geo_tag\".format(folder_name)):\n",
+    "        os.mkdir(\"results/{0}/geo_tag\".format(folder_name))\n",
     "    info_stats = pickle.load(open(\"results/{}/geo_tag_gps.pkl\".format(folder_name), \"rb\")) #20GB\n",
     "    region_tags = info_stats['region_tags']\n",
     "    subregion_tags = info_stats.get('subregion_tags', None)\n",
@@ -833,7 +831,7 @@
     "    total_counts = total_counts.astype(int)\n",
     "    sum_total_counts = int(np.sum(total_counts))\n",
     "\n",
-    "    if not os.path.exists('checkpoints/{}/6_a.pkl'.format(folder_name)):\n",
+    "    if not os.path.exists('checkpoints/{}/geo_tag_a.pkl'.format(folder_name)):\n",
     "        pvalues_over = {} # pvalue : '[region]: [tag] (region num and total num info for now)'\n",
     "        pvalues_under = {} \n",
     "        for region, counts in region_tags.items():\n",
@@ -852,7 +850,7 @@
     "                else:\n",
     "                    pvalues_under[p] = tag_info\n",
     "    else:\n",
-    "        pvalues_under, pvalues_over = pickle.load(open('checkpoints/{}/6_a.pkl'.format(folder_name), 'rb'))\n",
+    "        pvalues_under, pvalues_over = pickle.load(open('checkpoints/{}/geo_tag_a.pkl'.format(folder_name), 'rb'))\n",
     "\n",
     "    if subregion_tags is not None:\n",
     "        subregion_total_counts = np.zeros(len(categories))\n",
@@ -942,7 +940,7 @@
     "    import warnings\n",
     "    warnings.filterwarnings(\"ignore\")\n",
     "\n",
-    "    if not os.path.exists('checkpoints/{}/6_b.pkl'.format(folder_name)):\n",
+    "    if not os.path.exists('checkpoints/{}/geo_tag_b.pkl'.format(folder_name)):\n",
     "        phrase_to_value = {}\n",
     "        # Look at appearance differences in how a tag is represented across subregions\n",
     "        for tag in tag_to_subregion_features.keys():\n",
@@ -988,7 +986,7 @@
     "            fontP = FontProperties()\n",
     "            fontP.set_size('small')\n",
     "            lgd = plt.legend(handles=handles, bbox_to_anchor=(1.04,1), loc=\"upper left\", prop=fontP)\n",
-    "            plt.savefig('results/{0}/{1}/{2}_tsne.png'.format(folder_name, 6, dataset.categories[tag]), bbox_extra_artists=(lgd,), bbox_inches='tight')\n",
+    "            plt.savefig('results/{0}/{1}/{2}_tsne.png'.format(folder_name, \"geo_tag\", dataset.categories[tag]), bbox_extra_artists=(lgd,), bbox_inches='tight')\n",
     "            plt.close()\n",
     "\n",
     "            class_preds = clf.predict(all_features)\n",
@@ -1034,10 +1032,10 @@
     "            phrase = dataset.labels_to_names[dataset.categories[tag]]\n",
     "            phrase_to_value[phrase] = [value, all_subregions[diff_subregion], acc, p_value, num_features, j_to_acc]\n",
     "            \n",
-    "            pickle.dump([original_labels, class_probs, class_preds, diff_subregion, all_filepaths], open('results/{0}/{1}/{2}_info.pkl'.format(folder_name, 6, dataset.labels_to_names[dataset.categories[tag]]), 'wb'))\n",
-    "        pickle.dump(phrase_to_value, open('checkpoints/{}/6_b.pkl'.format(folder_name), 'wb'))\n",
+    "            pickle.dump([original_labels, class_probs, class_preds, diff_subregion, all_filepaths], open('results/{0}/{1}/{2}_info.pkl'.format(folder_name, \"geo_tag\", dataset.labels_to_names[dataset.categories[tag]]), 'wb'))\n",
+    "        pickle.dump(phrase_to_value, open('checkpoints/{}/geo_tag_b.pkl'.format(folder_name), 'wb'))\n",
     "    else:\n",
-    "        phrase_to_value = pickle.load(open('checkpoints/{}/6_b.pkl'.format(folder_name), 'rb'))\n",
+    "        phrase_to_value = pickle.load(open('checkpoints/{}/geo_tag_b.pkl'.format(folder_name), 'rb'))\n",
     "\n",
     "    svm_options = []\n",
     "    best_tag = None\n",
@@ -1055,7 +1053,7 @@
     "    else:\n",
     "        if tag is None:\n",
     "            return\n",
-    "        this_info = pickle.load(open('results/{0}/{1}/{2}_info.pkl'.format(folder_name, 6, tag), 'rb'))\n",
+    "        this_info = pickle.load(open('results/{0}/{1}/{2}_info.pkl'.format(folder_name, \"geo_tag\", tag), 'rb'))\n",
     "        labels, class_probs, class_preds, diff_subregion, all_filepaths = this_info\n",
     "        value, region, acc, p_value, num_features, j_to_acc = phrase_to_value[tag]\n",
     "        if num is not None:\n",

diff --git a/measurements/geography_based.py b/measurements/geography_based.py
@@ -173,7 +173,7 @@ def geo_tag(dataloader, args):
     if (dataloader.dataset.geography_info_type == "GPS_LABEL"):
         print("redirecting to geo_tag_gps()...")
         return geo_tag_gps(dataloader, args)
-    if (dataloader.dataset.geography_info_type == "STRING_FORMATTED_LABEL" and dataloader.dataset.geography_label_string_type == "REGION_LABEL"):
+    elif (dataloader.dataset.geography_info_type == "STRING_FORMATTED_LABEL" and dataloader.dataset.geography_label_string_type == "REGION_LABEL"):
         print("redirecting to geo_tag_region()...")
         return geo_tag_region(dataloader, args)
     country_tags = {}