In [1]:
from wrappers.linguist_wrapper import detect_linguist
from wrappers.enry_wrapper import detect_enry

import os
from collections import defaultdict

In [2]:
# load paths to data
paths = defaultdict(list)
data_path = "./test_data/"

for dir in os.listdir(data_path):
    for file in os.listdir(data_path + dir):
        paths[dir].append(data_path + dir + "/" + file)

In [3]:
ground_truth = []
paths_flat = []

for k in paths:
    for p in paths[k]:
        ground_truth.append(k)
        paths_flat.append(p)

preds_ling = [p.decode("utf-8") for p in detect_enry(paths_flat)]
preds_enry = [p.decode("utf-8") for p in detect_enry(paths_flat)]

In [4]:
# wrongly classified 
# enry: 197

wrongly_classified_ling = []
wrongly_classified_enry = []
for idx, t in enumerate(ground_truth):
    if t != preds_ling[idx]:
        wrongly_classified_ling.append(idx)
    if t != preds_enry[idx]:
        wrongly_classified_enry.append(idx)

In [5]:
len(wrongly_classified_ling), len(wrongly_classified_enry)

(193, 193)

In [6]:
for l, e in zip(wrongly_classified_ling, wrongly_classified_enry):
    if l != e:
        print(l, e)

In [7]:
from pygments.lexers import guess_lexer

text = """package Others;

/**
 * Dijkstra's algorithm,is a graph search algorithm that solves the single-source shortest path
 * problem for a graph with nonnegative edge path costs, producing a shortest path tree.
 *
 * <p>NOTE: The inputs to Dijkstra's algorithm are a directed and weighted graph consisting of 2 or
 * more nodes, generally represented by an adjacency matrix or list, and a start node.
 *
 * <p>Original source of code: https://rosettacode.org/wiki/Dijkstra%27s_algorithm#Java Also most of
 * the comments are from RosettaCode.
 */
import java.util.*;

public class Dijkstra {
  private static final Graph.Edge[] GRAPH = {
    // Distance from node "a" to node "b" is 7.
    // In the current Graph there is no way to move the other way (e,g, from "b" to "a"),
    // a new edge would be needed for that
    new Graph.Edge("a", "b", 7),
    new Graph.Edge("a", "c", 9),
    new Graph.Edge("a", "f", 14),
    new Graph.Edge("b", "c", 10),
    new Graph.Edge("b", "d", 15),
    new Graph.Edge("c", "d", 11),
    new Graph.Edge("c", "f", 2),
    new Graph.Edge("d", "e", 6),
    new Graph.Edge("e", "f", 9),
  };
  private static final String START = "a";
  private static final String END = "e";

  /** main function Will run the code with "GRAPH" that was defined above. */
  public static void main(String[] args) {
    Graph g = new Graph(GRAPH);
    g.dijkstra(START);
    g.printPath(END);
    // g.printAllPaths();
  }
}

class Graph {
  // mapping of vertex names to Vertex objects, built from a set of Edges
  private final Map<String, Vertex> graph;

  /** One edge of the graph (only used by Graph constructor) */
  public static class Edge {
    public final String v1, v2;
    public final int dist;

    public Edge(String v1, String v2, int dist) {
      this.v1 = v1;
      this.v2 = v2;
      this.dist = dist;
    }
  }

  /** One vertex of the graph, complete with mappings to neighbouring vertices */
  public static class Vertex implements Comparable<Vertex> {
    public final String name;
    // MAX_VALUE assumed to be infinity
    public int dist = Integer.MAX_VALUE;
    public Vertex previous = null;
    public final Map<Vertex, Integer> neighbours = new HashMap<>();

    public Vertex(String name) {
      this.name = name;
    }

    private void printPath() {
      if (this == this.previous) {
        System.out.printf("%s", this.name);
      } else if (this.previous == null) {
        System.out.printf("%s(unreached)", this.name);
      } else {
        this.previous.printPath();
        System.out.printf(" -> %s(%d)", this.name, this.dist);
      }
    }

    public int compareTo(Vertex other) {
      if (dist == other.dist) return name.compareTo(other.name);

      return Integer.compare(dist, other.dist);
    }

    @Override
    public boolean equals(Object object) {
      if (this == object) return true;
      if (object == null || getClass() != object.getClass()) return false;
      if (!super.equals(object)) return false;

      Vertex vertex = (Vertex) object;

      if (dist != vertex.dist) return false;
      if (name != null ? !name.equals(vertex.name) : vertex.name != null) return false;
      if (previous != null ? !previous.equals(vertex.previous) : vertex.previous != null)
        return false;
      if (neighbours != null ? !neighbours.equals(vertex.neighbours) : vertex.neighbours != null)
        return false;

      return true;
    }

    @Override
    public int hashCode() {
      int result = super.hashCode();
      result = 31 * result + (name != null ? name.hashCode() : 0);
      result = 31 * result + dist;
      result = 31 * result + (previous != null ? previous.hashCode() : 0);
      result = 31 * result + (neighbours != null ? neighbours.hashCode() : 0);
      return result;
    }

    @Override
    public String toString() {
      return "(" + name + ", " + dist + ")";
    }
  }

  /** Builds a graph from a set of edges */
  public Graph(Edge[] edges) {
    graph = new HashMap<>(edges.length);

    // one pass to find all vertices
    for (Edge e : edges) {
      if (!graph.containsKey(e.v1)) graph.put(e.v1, new Vertex(e.v1));
      if (!graph.containsKey(e.v2)) graph.put(e.v2, new Vertex(e.v2));
    }

    // another pass to set neighbouring vertices
    for (Edge e : edges) {
      graph.get(e.v1).neighbours.put(graph.get(e.v2), e.dist);
      // graph.get(e.v2).neighbours.put(graph.get(e.v1), e.dist); // also do this for an undirected
      // graph
    }
  }

  /** Runs dijkstra using a specified source vertex */
  public void dijkstra(String startName) {
    if (!graph.containsKey(startName)) {
      System.err.printf("Graph doesn't contain start vertex \"%s\"%n", startName);
      return;
    }
    final Vertex source = graph.get(startName);
    NavigableSet<Vertex> q = new TreeSet<>();

    // set-up vertices
    for (Vertex v : graph.values()) {
      v.previous = v == source ? source : null;
      v.dist = v == source ? 0 : Integer.MAX_VALUE;
      q.add(v);
    }

    dijkstra(q);
  }

  /** Implementation of dijkstra's algorithm using a binary heap. */
  private void dijkstra(final NavigableSet<Vertex> q) {
    Vertex u, v;
    while (!q.isEmpty()) {
      // vertex with shortest distance (first iteration will return source)
      u = q.pollFirst();
      if (u.dist == Integer.MAX_VALUE)
        break; // we can ignore u (and any other remaining vertices) since they are unreachable

      // look at distances to each neighbour
      for (Map.Entry<Vertex, Integer> a : u.neighbours.entrySet()) {
        v = a.getKey(); // the neighbour in this iteration

        final int alternateDist = u.dist + a.getValue();
        if (alternateDist < v.dist) { // shorter path to neighbour found
          q.remove(v);
          v.dist = alternateDist;
          v.previous = u;
          q.add(v);
        }
      }
    }
  }

  /** Prints a path from the source to the specified vertex */
  public void printPath(String endName) {
    if (!graph.containsKey(endName)) {
      System.err.printf("Graph doesn't contain end vertex \"%s\"%n", endName);
      return;
    }

    graph.get(endName).printPath();
    System.out.println();
  }

  /** Prints the path from the source to every vertex (output order is not guaranteed) */
  public void printAllPaths() {
    for (Vertex v : graph.values()) {
      v.printPath();
      System.out.println();
    }
  }
}"""

guess_lexer(text).name

'Python'

In [8]:
from guesslang import Guess

guess = Guess()

name = guess.language_name("""package Others;

/**
 * Dijkstra's algorithm,is a graph search algorithm that solves the single-source shortest path
 * problem for a graph with nonnegative edge path costs, producing a shortest path tree.
 *
 * <p>NOTE: The inputs to Dijkstra's algorithm are a directed and weighted graph consisting of 2 or
 * more nodes, generally represented by an adjacency matrix or list, and a start node.
 *
 * <p>Original source of code: https://rosettacode.org/wiki/Dijkstra%27s_algorithm#Java Also most of
 * the comments are from RosettaCode.
 */
import java.util.*;

public class Dijkstra {
  private static final Graph.Edge[] GRAPH = {
    // Distance from node "a" to node "b" is 7.
    // In the current Graph there is no way to move the other way (e,g, from "b" to "a"),
    // a new edge would be needed for that
    new Graph.Edge("a", "b", 7),
    new Graph.Edge("a", "c", 9),
    new Graph.Edge("a", "f", 14),
    new Graph.Edge("b", "c", 10),
    new Graph.Edge("b", "d", 15),
    new Graph.Edge("c", "d", 11),
    new Graph.Edge("c", "f", 2),
    new Graph.Edge("d", "e", 6),
    new Graph.Edge("e", "f", 9),
  };
  private static final String START = "a";
  private static final String END = "e";

  /** main function Will run the code with "GRAPH" that was defined above. */
  public static void main(String[] args) {
    Graph g = new Graph(GRAPH);
    g.dijkstra(START);
    g.printPath(END);
    // g.printAllPaths();
  }
}

class Graph {
  // mapping of vertex names to Vertex objects, built from a set of Edges
  private final Map<String, Vertex> graph;

  /** One edge of the graph (only used by Graph constructor) */
  public static class Edge {
    public final String v1, v2;
    public final int dist;

    public Edge(String v1, String v2, int dist) {
      this.v1 = v1;
      this.v2 = v2;
      this.dist = dist;
    }
  }

  /** One vertex of the graph, complete with mappings to neighbouring vertices */
  public static class Vertex implements Comparable<Vertex> {
    public final String name;
    // MAX_VALUE assumed to be infinity
    public int dist = Integer.MAX_VALUE;
    public Vertex previous = null;
    public final Map<Vertex, Integer> neighbours = new HashMap<>();

    public Vertex(String name) {
      this.name = name;
    }

    private void printPath() {
      if (this == this.previous) {
        System.out.printf("%s", this.name);
      } else if (this.previous == null) {
        System.out.printf("%s(unreached)", this.name);
      } else {
        this.previous.printPath();
        System.out.printf(" -> %s(%d)", this.name, this.dist);
      }
    }

    public int compareTo(Vertex other) {
      if (dist == other.dist) return name.compareTo(other.name);

      return Integer.compare(dist, other.dist);
    }

    @Override
    public boolean equals(Object object) {
      if (this == object) return true;
      if (object == null || getClass() != object.getClass()) return false;
      if (!super.equals(object)) return false;

      Vertex vertex = (Vertex) object;

      if (dist != vertex.dist) return false;
      if (name != null ? !name.equals(vertex.name) : vertex.name != null) return false;
      if (previous != null ? !previous.equals(vertex.previous) : vertex.previous != null)
        return false;
      if (neighbours != null ? !neighbours.equals(vertex.neighbours) : vertex.neighbours != null)
        return false;

      return true;
    }

    @Override
    public int hashCode() {
      int result = super.hashCode();
      result = 31 * result + (name != null ? name.hashCode() : 0);
      result = 31 * result + dist;
      result = 31 * result + (previous != null ? previous.hashCode() : 0);
      result = 31 * result + (neighbours != null ? neighbours.hashCode() : 0);
      return result;
    }

    @Override
    public String toString() {
      return "(" + name + ", " + dist + ")";
    }
  }

  /** Builds a graph from a set of edges */
  public Graph(Edge[] edges) {
    graph = new HashMap<>(edges.length);

    // one pass to find all vertices
    for (Edge e : edges) {
      if (!graph.containsKey(e.v1)) graph.put(e.v1, new Vertex(e.v1));
      if (!graph.containsKey(e.v2)) graph.put(e.v2, new Vertex(e.v2));
    }

    // another pass to set neighbouring vertices
    for (Edge e : edges) {
      graph.get(e.v1).neighbours.put(graph.get(e.v2), e.dist);
      // graph.get(e.v2).neighbours.put(graph.get(e.v1), e.dist); // also do this for an undirected
      // graph
    }
  }

  /** Runs dijkstra using a specified source vertex */
  public void dijkstra(String startName) {
    if (!graph.containsKey(startName)) {
      System.err.printf("Graph doesn't contain start vertex \"%s\"%n", startName);
      return;
    }
    final Vertex source = graph.get(startName);
    NavigableSet<Vertex> q = new TreeSet<>();

    // set-up vertices
    for (Vertex v : graph.values()) {
      v.previous = v == source ? source : null;
      v.dist = v == source ? 0 : Integer.MAX_VALUE;
      q.add(v);
    }

    dijkstra(q);
  }

  /** Implementation of dijkstra's algorithm using a binary heap. */
  private void dijkstra(final NavigableSet<Vertex> q) {
    Vertex u, v;
    while (!q.isEmpty()) {
      // vertex with shortest distance (first iteration will return source)
      u = q.pollFirst();
      if (u.dist == Integer.MAX_VALUE)
        break; // we can ignore u (and any other remaining vertices) since they are unreachable

      // look at distances to each neighbour
      for (Map.Entry<Vertex, Integer> a : u.neighbours.entrySet()) {
        v = a.getKey(); // the neighbour in this iteration

        final int alternateDist = u.dist + a.getValue();
        if (alternateDist < v.dist) { // shorter path to neighbour found
          q.remove(v);
          v.dist = alternateDist;
          v.previous = u;
          q.add(v);
        }
      }
    }
  }

  /** Prints a path from the source to the specified vertex */
  public void printPath(String endName) {
    if (!graph.containsKey(endName)) {
      System.err.printf("Graph doesn't contain end vertex \"%s\"%n", endName);
      return;
    }

    graph.get(endName).printPath();
    System.out.println();
  }

  /** Prints the path from the source to every vertex (output order is not guaranteed) */
  public void printAllPaths() {
    for (Vertex v : graph.values()) {
      v.printPath();
      System.out.println();
    }
  }
}""")

print(name)

Java
